DPC++ Runtime
Runtime libraries for oneAPI DPC++
handler.cpp
Go to the documentation of this file.
1 //==-------- handler.cpp --- SYCL command group handler --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <algorithm>
10 
11 #include <detail/config.hpp>
13 #include <detail/graph_impl.hpp>
14 #include <detail/handler_impl.hpp>
15 #include <detail/image_impl.hpp>
17 #include <detail/kernel_impl.hpp>
18 #include <detail/queue_impl.hpp>
21 #include <detail/usm/usm_impl.hpp>
22 #include <sycl/detail/common.hpp>
23 #include <sycl/detail/helpers.hpp>
25 #include <sycl/detail/pi.h>
26 #include <sycl/detail/pi.hpp>
27 #include <sycl/event.hpp>
28 #include <sycl/handler.hpp>
29 #include <sycl/info/info_desc.hpp>
30 #include <sycl/stream.hpp>
31 
34 
35 namespace sycl {
36 inline namespace _V1 {
37 
38 namespace detail {
39 
40 bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) {
41  DeviceGlobalMapEntry *DGEntry =
43  DeviceGlobalPtr);
44  return DGEntry && !DGEntry->MImageIdentifiers.empty();
45 }
46 
49  if (DstPtrType == sycl::usm::alloc::device) {
50  // Dest is on device
51  if (SrcPtrType == sycl::usm::alloc::device)
53  if (SrcPtrType == sycl::usm::alloc::host ||
54  SrcPtrType == sycl::usm::alloc::unknown)
57  "Unknown copy source location");
58  }
59  if (DstPtrType == sycl::usm::alloc::host ||
60  DstPtrType == sycl::usm::alloc::unknown) {
61  // Dest is on host
62  if (SrcPtrType == sycl::usm::alloc::device)
64  if (SrcPtrType == sycl::usm::alloc::host ||
65  SrcPtrType == sycl::usm::alloc::unknown)
67  "Cannot copy image from host to host");
69  "Unknown copy source location");
70  }
72  "Unknown copy destination location");
73 }
74 
77  &DynamicParamBase) {
78  return sycl::detail::getSyclObjImpl(DynamicParamBase)->getValue();
79 }
80 
81 } // namespace detail
82 
83 handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost)
84  : handler(Queue, Queue, nullptr, IsHost) {}
85 
86 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
87  std::shared_ptr<detail::queue_impl> PrimaryQueue,
88  std::shared_ptr<detail::queue_impl> SecondaryQueue,
89  bool IsHost)
90  : MImpl(std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
91  std::move(SecondaryQueue))),
92  MQueue(std::move(Queue)), MIsHost(IsHost) {}
93 
94 handler::handler(
95  std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph)
96  : MImpl(std::make_shared<detail::handler_impl>()), MGraph(Graph) {}
97 
98 // Sets the submission state to indicate that an explicit kernel bundle has been
99 // set. Throws a sycl::exception with errc::invalid if the current state
100 // indicates that a specialization constant has been set.
101 void handler::setStateExplicitKernelBundle() {
102  MImpl->setStateExplicitKernelBundle();
103 }
104 
105 // Sets the submission state to indicate that a specialization constant has been
106 // set. Throws a sycl::exception with errc::invalid if the current state
107 // indicates that an explicit kernel bundle has been set.
108 void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); }
109 
110 // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and
111 // false otherwise.
112 bool handler::isStateExplicitKernelBundle() const {
113  return MImpl->isStateExplicitKernelBundle();
114 }
115 
116 // Returns a shared_ptr to the kernel_bundle.
117 // If there is no kernel_bundle created:
118 // returns newly created kernel_bundle if Insert is true
119 // returns shared_ptr(nullptr) if Insert is false
120 std::shared_ptr<detail::kernel_bundle_impl>
121 handler::getOrInsertHandlerKernelBundle(bool Insert) const {
122  if (!MImpl->MKernelBundle && Insert) {
123  auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context();
124  auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
125  MImpl->MKernelBundle = detail::getSyclObjImpl(
126  get_kernel_bundle<bundle_state::input>(Ctx, {Dev}, {}));
127  }
128  return MImpl->MKernelBundle;
129 }
130 
131 // Sets kernel bundle to the provided one.
132 void handler::setHandlerKernelBundle(
133  const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
134  MImpl->MKernelBundle = NewKernelBundleImpPtr;
135 }
136 
137 void handler::setHandlerKernelBundle(kernel Kernel) {
138  // Kernel may not have an associated kernel bundle if it is created from a
139  // program. As such, apply getSyclObjImpl directly on the kernel, i.e. not
140  // the other way around: getSyclObjImp(Kernel->get_kernel_bundle()).
141  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpl =
142  detail::getSyclObjImpl(Kernel)->get_kernel_bundle();
143  setHandlerKernelBundle(KernelBundleImpl);
144 }
145 
146 event handler::finalize() {
147  // This block of code is needed only for reduction implementation.
148  // It is harmless (does nothing) for everything else.
149  if (MIsFinalized)
150  return MLastEvent;
151  MIsFinalized = true;
152 
153  // According to 4.7.6.9 of SYCL2020 spec, if a placeholder accessor is passed
154  // to a command without being bound to a command group, an exception should
155  // be thrown.
156  {
157  for (const auto &arg : MArgs) {
159  continue;
160 
161  detail::Requirement *AccImpl =
162  static_cast<detail::Requirement *>(arg.MPtr);
163  if (AccImpl->MIsPlaceH) {
164  auto It = std::find(CGData.MRequirements.begin(),
165  CGData.MRequirements.end(), AccImpl);
166  if (It == CGData.MRequirements.end())
168  "placeholder accessor must be bound by calling "
169  "handler::require() before it can be used.");
170 
171  // Check associated accessors
172  bool AccFound = false;
173  for (detail::ArgDesc &Acc : MAssociatedAccesors) {
175  static_cast<detail::Requirement *>(Acc.MPtr) == AccImpl) {
176  AccFound = true;
177  break;
178  }
179  }
180 
181  if (!AccFound) {
183  "placeholder accessor must be bound by calling "
184  "handler::require() before it can be used.");
185  }
186  }
187  }
188  }
189 
190  const auto &type = getType();
191  if (type == detail::CG::Kernel) {
192  // If there were uses of set_specialization_constant build the kernel_bundle
193  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
194  getOrInsertHandlerKernelBundle(/*Insert=*/false);
195  if (KernelBundleImpPtr) {
196  // Make sure implicit non-interop kernel bundles have the kernel
197  if (!KernelBundleImpPtr->isInterop() &&
198  !MImpl->isStateExplicitKernelBundle()) {
199  auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
200  kernel_id KernelID =
202  MKernelName.c_str());
203  bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
204  // If kernel was not inserted and the bundle is in input mode we try
205  // building it and trying to find the kernel in executable mode
206  if (!KernelInserted &&
207  KernelBundleImpPtr->get_bundle_state() == bundle_state::input) {
208  auto KernelBundle =
209  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
210  KernelBundleImpPtr);
211  kernel_bundle<bundle_state::executable> ExecKernelBundle =
212  build(KernelBundle);
213  KernelBundleImpPtr = detail::getSyclObjImpl(ExecKernelBundle);
214  setHandlerKernelBundle(KernelBundleImpPtr);
215  KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
216  }
217  // If the kernel was not found in executable mode we throw an exception
218  if (!KernelInserted)
220  "Failed to add kernel to kernel bundle.");
221  }
222 
223  switch (KernelBundleImpPtr->get_bundle_state()) {
224  case bundle_state::input: {
225  // Underlying level expects kernel_bundle to be in executable state
226  kernel_bundle<bundle_state::executable> ExecBundle = build(
227  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
228  KernelBundleImpPtr));
229  KernelBundleImpPtr = detail::getSyclObjImpl(ExecBundle);
230  setHandlerKernelBundle(KernelBundleImpPtr);
231  break;
232  }
234  // Nothing to do
235  break;
238  assert(0 && "Expected that the bundle is either in input or executable "
239  "states.");
240  break;
241  }
242  }
243 
244  if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() &&
245  !MQueue->is_in_fusion_mode() && !CGData.MRequirements.size() &&
246  !MStreamStorage.size() &&
247  (!CGData.MEvents.size() ||
248  (MQueue->isInOrder() &&
250  CGData.MEvents, MQueue->getContextImplPtr())))) {
251  // if user does not add a new dependency to the dependency graph, i.e.
252  // the graph is not changed, and the queue is not in fusion mode, then
253  // this faster path is used to submit kernel bypassing scheduler and
254  // avoiding CommandGroup, Command objects creation.
255 
256  std::vector<sycl::detail::pi::PiEvent> RawEvents;
257  detail::EventImplPtr NewEvent;
258 
259 #ifdef XPTI_ENABLE_INSTRUMENTATION
260  // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent,
261  int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME);
262  auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
263  StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, MNDRDesc,
264  KernelBundleImpPtr, MArgs);
265  auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
266  InstanceID = InstanceID]() {
267 #else
268  auto EnqueueKernel = [&]() {
269 #endif
270  // 'Result' for single point of return
271  pi_int32 Result = PI_ERROR_INVALID_VALUE;
272 #ifdef XPTI_ENABLE_INSTRUMENTATION
273  detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
274  xpti::trace_task_begin, nullptr);
275 #endif
276  if (MQueue->is_host()) {
277  MHostKernel->call(MNDRDesc, (NewEvent)
278  ? NewEvent->getHostProfilingInfo()
279  : nullptr);
280  Result = PI_SUCCESS;
281  } else {
282  if (MQueue->getDeviceImplPtr()->getBackend() ==
283  backend::ext_intel_esimd_emulator) {
284  // Capture the host timestamp for profiling (queue time)
285  if (NewEvent != nullptr)
286  NewEvent->setHostEnqueueTime();
287  [&](auto... Args) {
288  if (MImpl->MKernelIsCooperative) {
289  MQueue->getPlugin()
290  ->call<
292  Args...);
293  } else {
294  MQueue->getPlugin()
296  }
297  }(/* queue */
298  nullptr,
299  /* kernel */
300  reinterpret_cast<pi_kernel>(MHostKernel->getPtr()),
301  /* work_dim */
302  MNDRDesc.Dims,
303  /* global_work_offset */ &MNDRDesc.GlobalOffset[0],
304  /* global_work_size */ &MNDRDesc.GlobalSize[0],
305  /* local_work_size */ &MNDRDesc.LocalSize[0],
306  /* num_events_in_wait_list */ 0,
307  /* event_wait_list */ nullptr,
308  /* event */ nullptr);
309  Result = PI_SUCCESS;
310  } else {
311  Result = enqueueImpKernel(
312  MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel,
313  MKernelName.c_str(), RawEvents, NewEvent, nullptr,
314  MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative);
315  }
316  }
317 #ifdef XPTI_ENABLE_INSTRUMENTATION
318  // Emit signal only when event is created
319  if (NewEvent != nullptr) {
320  detail::emitInstrumentationGeneral(
321  StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
322  static_cast<const void *>(NewEvent->getHandleRef()));
323  }
324  detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
325  xpti::trace_task_end, nullptr);
326 #endif
327  return Result;
328  };
329 
330  bool DiscardEvent = false;
331  if (MQueue->supportsDiscardingPiEvents()) {
332  // Kernel only uses assert if it's non interop one
333  bool KernelUsesAssert =
334  !(MKernel && MKernel->isInterop()) &&
336  MKernelName.c_str());
337  DiscardEvent = !KernelUsesAssert;
338  }
339 
340  if (DiscardEvent) {
341  if (PI_SUCCESS != EnqueueKernel())
342  throw runtime_error("Enqueue process failed.",
343  PI_ERROR_INVALID_OPERATION);
344  } else {
345  NewEvent = std::make_shared<detail::event_impl>(MQueue);
346  NewEvent->setWorkerQueue(MQueue);
347  NewEvent->setContextImpl(MQueue->getContextImplPtr());
348  NewEvent->setStateIncomplete();
349  NewEvent->setSubmissionTime();
350 
351  if (PI_SUCCESS != EnqueueKernel())
352  throw runtime_error("Enqueue process failed.",
353  PI_ERROR_INVALID_OPERATION);
354  else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr)
355  NewEvent->setComplete();
356  NewEvent->setEnqueued();
357 
358  MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
359  }
360  return MLastEvent;
361  }
362  }
363 
364  std::unique_ptr<detail::CG> CommandGroup;
365  switch (type) {
366  case detail::CG::Kernel: {
367  // Copy kernel name here instead of move so that it's available after
368  // running of this method by reductions implementation. This allows for
369  // assert feature to check if kernel uses assertions
370  CommandGroup.reset(new detail::CGExecKernel(
371  std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
372  std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs),
373  MKernelName.c_str(), std::move(MStreamStorage),
374  std::move(MImpl->MAuxiliaryResources), MCGType,
375  MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, MCodeLoc));
376  break;
377  }
381  CommandGroup.reset(
382  new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData),
383  std::move(MImpl->MAuxiliaryResources), MCodeLoc));
384  break;
385  case detail::CG::Fill:
386  CommandGroup.reset(new detail::CGFill(std::move(MPattern), MDstPtr,
387  std::move(CGData), MCodeLoc));
388  break;
390  CommandGroup.reset(
391  new detail::CGUpdateHost(MDstPtr, std::move(CGData), MCodeLoc));
392  break;
393  case detail::CG::CopyUSM:
394  CommandGroup.reset(new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength,
395  std::move(CGData), MCodeLoc));
396  break;
397  case detail::CG::FillUSM:
398  CommandGroup.reset(new detail::CGFillUSM(
399  std::move(MPattern), MDstPtr, MLength, std::move(CGData), MCodeLoc));
400  break;
402  CommandGroup.reset(new detail::CGPrefetchUSM(MDstPtr, MLength,
403  std::move(CGData), MCodeLoc));
404  break;
406  CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, MImpl->MAdvice,
407  std::move(CGData), MCGType,
408  MCodeLoc));
409  break;
411  CommandGroup.reset(new detail::CGCopy2DUSM(
412  MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth,
413  MImpl->MHeight, std::move(CGData), MCodeLoc));
414  break;
416  CommandGroup.reset(new detail::CGFill2DUSM(
417  std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth,
418  MImpl->MHeight, std::move(CGData), MCodeLoc));
419  break;
421  CommandGroup.reset(new detail::CGMemset2DUSM(
422  MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight,
423  std::move(CGData), MCodeLoc));
424  break;
426  auto context = MGraph ? detail::getSyclObjImpl(MGraph->getContext())
427  : MQueue->getContextImplPtr();
428  CommandGroup.reset(new detail::CGHostTask(
429  std::move(MHostTask), MQueue, context, std::move(MArgs),
430  std::move(CGData), MCGType, MCodeLoc));
431  break;
432  }
433  case detail::CG::Barrier:
435  if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) {
436  // if no event to wait for was specified, we add all exit
437  // nodes/events of the graph
438  if (MEventsWaitWithBarrier.size() == 0) {
439  MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
440  // Graph-wide barriers take precedence over previous one.
441  // We therefore remove the previous ones from ExtraDependencies list.
442  // The current barrier is then added to this list in the graph_impl.
443  std::vector<detail::EventImplPtr> EventsBarriers =
444  GraphImpl->removeBarriersFromExtraDependencies();
445  MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
446  std::begin(EventsBarriers),
447  std::end(EventsBarriers));
448  }
449  CGData.MEvents.insert(std::end(CGData.MEvents),
450  std::begin(MEventsWaitWithBarrier),
451  std::end(MEventsWaitWithBarrier));
452  // Barrier node is implemented as an empty node in Graph
453  // but keep the barrier type to help managing dependencies
454  MCGType = detail::CG::Barrier;
455  CommandGroup.reset(
456  new detail::CG(detail::CG::Barrier, std::move(CGData), MCodeLoc));
457  } else {
458  CommandGroup.reset(
459  new detail::CGBarrier(std::move(MEventsWaitWithBarrier),
460  std::move(CGData), MCGType, MCodeLoc));
461  }
462  break;
463  }
465  CommandGroup.reset(new detail::CGCopyToDeviceGlobal(
466  MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
467  std::move(CGData), MCodeLoc));
468  break;
469  }
471  CommandGroup.reset(new detail::CGCopyFromDeviceGlobal(
472  MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
473  std::move(CGData), MCodeLoc));
474  break;
475  }
477  CommandGroup.reset(new detail::CGReadWriteHostPipe(
478  MImpl->HostPipeName, MImpl->HostPipeBlocking, MImpl->HostPipePtr,
479  MImpl->HostPipeTypeSize, MImpl->HostPipeRead, std::move(CGData),
480  MCodeLoc));
481  break;
482  }
484  std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> ParentGraph =
485  MQueue ? MQueue->getCommandGraph() : MGraph;
486 
487  // If a parent graph is set that means we are adding or recording a subgraph
488  // and we don't want to actually execute this command graph submission.
489  if (ParentGraph) {
491  if (MQueue) {
493  ParentGraph->MMutex);
494  }
495  CGData.MRequirements = MExecGraph->getRequirements();
496  // Here we are using the CommandGroup without passing a CommandBuffer to
497  // pass the exec_graph_impl and event dependencies. Since this subgraph CG
498  // will not be executed this is fine.
499  CommandGroup.reset(new sycl::detail::CGExecCommandBuffer(
500  nullptr, MExecGraph, std::move(CGData)));
501 
502  } else {
503  event GraphCompletionEvent =
504  MExecGraph->enqueue(MQueue, std::move(CGData));
505  MLastEvent = GraphCompletionEvent;
506  return MLastEvent;
507  }
508  } break;
510  CommandGroup.reset(new detail::CGCopyImage(
511  MSrcPtr, MDstPtr, MImpl->MImageDesc, MImpl->MImageFormat,
512  MImpl->MImageCopyFlags, MImpl->MSrcOffset, MImpl->MDestOffset,
513  MImpl->MHostExtent, MImpl->MCopyExtent, std::move(CGData), MCodeLoc));
514  break;
516  CommandGroup.reset(new detail::CGSemaphoreWait(
517  MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
518  break;
520  CommandGroup.reset(new detail::CGSemaphoreSignal(
521  MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
522  break;
523  case detail::CG::None:
525  std::cout << "WARNING: An empty command group is submitted." << std::endl;
526  }
527 
528  // Empty nodes are handled by Graph like standard nodes
529  // For Standard mode (non-graph),
530  // empty nodes are not sent to the scheduler to save time
531  if (MGraph || (MQueue && MQueue->getCommandGraph())) {
532  CommandGroup.reset(
533  new detail::CG(detail::CG::None, std::move(CGData), MCodeLoc));
534  } else {
535  detail::EventImplPtr Event = std::make_shared<sycl::detail::event_impl>();
536  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
537  return MLastEvent;
538  }
539  break;
540  }
541 
542  if (!CommandGroup)
543  throw sycl::runtime_error(
544  "Internal Error. Command group cannot be constructed.",
545  PI_ERROR_INVALID_OPERATION);
546 
547  // If there is a graph associated with the handler we are in the explicit
548  // graph mode, so we store the CG instead of submitting it to the scheduler,
549  // so it can be retrieved by the graph later.
550  if (MGraph) {
551  MGraphNodeCG = std::move(CommandGroup);
552  return detail::createSyclObjFromImpl<event>(
553  std::make_shared<detail::event_impl>());
554  }
555 
556  // If the queue has an associated graph then we need to take the CG and pass
557  // it to the graph to create a node, rather than submit it to the scheduler.
558  if (auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) {
559  auto EventImpl = std::make_shared<detail::event_impl>();
560  std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
561  nullptr;
562 
563  // GraphImpl is read and written in this scope so we lock this graph
564  // with full priviledges.
566  GraphImpl->MMutex);
567 
569  MImpl->MUserFacingNodeType !=
571  ? MImpl->MUserFacingNodeType
573 
574  // Create a new node in the graph representing this command-group
575  if (MQueue->isInOrder()) {
576  // In-order queues create implicit linear dependencies between nodes.
577  // Find the last node added to the graph from this queue, so our new
578  // node can set it as a predecessor.
579  auto DependentNode = GraphImpl->getLastInorderNode(MQueue);
580 
581  NodeImpl = DependentNode
582  ? GraphImpl->add(NodeType, std::move(CommandGroup),
583  {DependentNode})
584  : GraphImpl->add(NodeType, std::move(CommandGroup));
585 
586  // If we are recording an in-order queue remember the new node, so it
587  // can be used as a dependency for any more nodes recorded from this
588  // queue.
589  GraphImpl->setLastInorderNode(MQueue, NodeImpl);
590  } else {
591  NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup));
592  }
593 
594  // Associate an event with this new node and return the event.
595  GraphImpl->addEventForNode(GraphImpl, EventImpl, NodeImpl);
596 
597  NodeImpl->MNDRangeUsed = MImpl->MNDRangeUsed;
598 
599  return detail::createSyclObjFromImpl<event>(EventImpl);
600  }
601 
603  std::move(CommandGroup), std::move(MQueue));
604 
605  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
606  return MLastEvent;
607 }
608 
609 void handler::addReduction(const std::shared_ptr<const void> &ReduObj) {
610  MImpl->MAuxiliaryResources.push_back(ReduObj);
611 }
612 
613 void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl,
614  int AccTarget) {
615  if (getCommandGraph() &&
616  static_cast<detail::SYCLMemObjT *>(AccImpl->MSYCLMemObj)
617  ->needsWriteBack()) {
619  "Accessors to buffers which have write_back enabled "
620  "are not allowed to be used in command graphs.");
621  }
622  detail::Requirement *Req = AccImpl.get();
623  if (Req->MAccessMode != sycl::access_mode::read) {
624  auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
625  SYCLMemObj->handleWriteAccessorCreation();
626  }
627  // Add accessor to the list of requirements.
628  if (Req->MAccessRange.size() != 0)
629  CGData.MRequirements.push_back(Req);
630  // Store copy of the accessor.
631  CGData.MAccStorage.push_back(std::move(AccImpl));
632  // Add an accessor to the handler list of associated accessors.
633  // For associated accessors index does not means nothing.
634  MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor,
635  Req, AccTarget, /*index*/ 0);
636 }
637 
638 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
639  access::target AccTarget) {
640  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
641  static_cast<int>(AccTarget));
642 }
643 
644 void handler::associateWithHandler(
645  detail::UnsampledImageAccessorBaseHost *AccBase, image_target AccTarget) {
646  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
647  static_cast<int>(AccTarget));
648 }
649 
650 void handler::associateWithHandler(
651  detail::SampledImageAccessorBaseHost *AccBase, image_target AccTarget) {
652  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
653  static_cast<int>(AccTarget));
654 }
655 
656 static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index,
657  size_t &IndexShift, int Size,
658  bool IsKernelCreatedFromSource,
659  size_t GlobalSize,
660  std::vector<detail::ArgDesc> &Args,
661  bool isESIMD) {
663  if (AccImpl->PerWI)
664  AccImpl->resize(GlobalSize);
665 
666  Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
667  Index + IndexShift);
668 
669  // TODO ESIMD currently does not suport offset, memory and access ranges -
670  // accessor::init for ESIMD-mode accessor has a single field, translated
671  // to a single kernel argument set above.
672  if (!isESIMD && !IsKernelCreatedFromSource) {
673  // Dimensionality of the buffer is 1 when dimensionality of the
674  // accessor is 0.
675  const size_t SizeAccField =
676  sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
677  ++IndexShift;
678  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
679  &AccImpl->MAccessRange[0], SizeAccField,
680  Index + IndexShift);
681  ++IndexShift;
682  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
683  &AccImpl->MMemoryRange[0], SizeAccField,
684  Index + IndexShift);
685  ++IndexShift;
686  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
687  &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
688  }
689 }
690 
691 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
692  const int Size, const size_t Index, size_t &IndexShift,
693  bool IsKernelCreatedFromSource, bool IsESIMD) {
695 
696  switch (Kind) {
697  case kernel_param_kind_t::kind_std_layout:
698  case kernel_param_kind_t::kind_pointer: {
699  MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
700  break;
701  }
702  case kernel_param_kind_t::kind_stream: {
703  // Stream contains several accessors inside.
704  stream *S = static_cast<stream *>(Ptr);
705 
706  detail::AccessorBaseHost *GBufBase =
707  static_cast<detail::AccessorBaseHost *>(&S->GlobalBuf);
708  detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase);
709  detail::Requirement *GBufReq = GBufImpl.get();
710  addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size,
711  IsKernelCreatedFromSource,
712  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
713  ++IndexShift;
714  detail::AccessorBaseHost *GOffsetBase =
715  static_cast<detail::AccessorBaseHost *>(&S->GlobalOffset);
716  detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase);
717  detail::Requirement *GOffsetReq = GOfssetImpl.get();
718  addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size,
719  IsKernelCreatedFromSource,
720  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
721  ++IndexShift;
722  detail::AccessorBaseHost *GFlushBase =
723  static_cast<detail::AccessorBaseHost *>(&S->GlobalFlushBuf);
724  detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase);
725  detail::Requirement *GFlushReq = GFlushImpl.get();
726 
727  size_t GlobalSize = MNDRDesc.GlobalSize.size();
728  // If work group size wasn't set explicitly then it must be recieved
729  // from kernel attribute or set to default values.
730  // For now we can't get this attribute here.
731  // So we just suppose that WG size is always default for stream.
732  // TODO adjust MNDRDesc when device image contains kernel's attribute
733  if (GlobalSize == 0) {
734  // Suppose that work group size is 1 for every dimension
735  GlobalSize = MNDRDesc.NumWorkGroups.size();
736  }
737  addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size,
738  IsKernelCreatedFromSource, GlobalSize, MArgs,
739  IsESIMD);
740  ++IndexShift;
741  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
742  &S->FlushBufferSize, sizeof(S->FlushBufferSize),
743  Index + IndexShift);
744 
745  break;
746  }
747  case kernel_param_kind_t::kind_accessor: {
748  // For args kind of accessor Size is information about accessor.
749  // The first 11 bits of Size encodes the accessor target.
750  const access::target AccTarget =
751  static_cast<access::target>(Size & AccessTargetMask);
752  switch (AccTarget) {
754  case access::target::constant_buffer: {
755  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
756  addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size,
757  IsKernelCreatedFromSource,
758  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
759  break;
760  }
761  case access::target::local: {
762  detail::LocalAccessorImplHost *LAcc =
763  static_cast<detail::LocalAccessorImplHost *>(Ptr);
764 
765  range<3> &Size = LAcc->MSize;
766  const int Dims = LAcc->MDims;
767  int SizeInBytes = LAcc->MElemSize;
768  for (int I = 0; I < Dims; ++I)
769  SizeInBytes *= Size[I];
770  // Some backends do not accept zero-sized local memory arguments, so we
771  // make it a minimum allocation of 1 byte.
772  SizeInBytes = std::max(SizeInBytes, 1);
773  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr,
774  SizeInBytes, Index + IndexShift);
775  // TODO ESIMD currently does not suport MSize field passing yet
776  // accessor::init for ESIMD-mode accessor has a single field, translated
777  // to a single kernel argument set above.
778  if (!IsESIMD && !IsKernelCreatedFromSource) {
779  ++IndexShift;
780  const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(Size[0]);
781  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
782  SizeAccField, Index + IndexShift);
783  ++IndexShift;
784  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
785  SizeAccField, Index + IndexShift);
786  ++IndexShift;
787  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
788  SizeAccField, Index + IndexShift);
789  }
790  break;
791  }
794  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
795  MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
796  if (!IsKernelCreatedFromSource) {
797  // TODO Handle additional kernel arguments for image class
798  // if the compiler front-end adds them.
799  }
800  break;
801  }
804  case access::target::host_buffer: {
805  throw sycl::invalid_parameter_error("Unsupported accessor target case.",
806  PI_ERROR_INVALID_OPERATION);
807  break;
808  }
809  }
810  break;
811  }
812  case kernel_param_kind_t::kind_sampler: {
813  MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler),
814  Index + IndexShift);
815  break;
816  }
817  case kernel_param_kind_t::kind_specialization_constants_buffer: {
818  MArgs.emplace_back(
819  kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
820  Index + IndexShift);
821  break;
822  }
823  case kernel_param_kind_t::kind_invalid:
824  throw runtime_error("Invalid kernel param kind", PI_ERROR_INVALID_VALUE);
825  break;
826  }
827 }
828 
829 // The argument can take up more space to store additional information about
830 // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor.
831 // We use the worst-case estimate because the lifetime of the vector is short.
832 // In processArg the kind_stream case introduces the maximum number of
833 // additional arguments. The case adds additional 12 arguments to the currently
834 // processed argument, hence worst-case estimate is 12+1=13.
835 // TODO: the constant can be removed if the size of MArgs will be calculated at
836 // compile time.
837 inline constexpr size_t MaxNumAdditionalArgs = 13;
838 
839 void handler::extractArgsAndReqs() {
840  assert(MKernel && "MKernel is not initialized");
841  std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
842  MArgs.clear();
843 
844  std::sort(
845  UnPreparedArgs.begin(), UnPreparedArgs.end(),
846  [](const detail::ArgDesc &first, const detail::ArgDesc &second) -> bool {
847  return (first.MIndex < second.MIndex);
848  });
849 
850  const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
851  MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size());
852 
853  size_t IndexShift = 0;
854  for (size_t I = 0; I < UnPreparedArgs.size(); ++I) {
855  void *Ptr = UnPreparedArgs[I].MPtr;
856  const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType;
857  const int &Size = UnPreparedArgs[I].MSize;
858  const int Index = UnPreparedArgs[I].MIndex;
859  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
860  false);
861  }
862 }
863 
864 void handler::extractArgsAndReqsFromLambda(
865  char *LambdaPtr, size_t KernelArgsNum,
866  const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
867  const bool IsKernelCreatedFromSource = false;
868  size_t IndexShift = 0;
869  MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum);
870 
871  for (size_t I = 0; I < KernelArgsNum; ++I) {
872  void *Ptr = LambdaPtr + KernelArgs[I].offset;
873  const detail::kernel_param_kind_t &Kind = KernelArgs[I].kind;
874  const int &Size = KernelArgs[I].info;
876  // For args kind of accessor Size is information about accessor.
877  // The first 11 bits of Size encodes the accessor target.
878  const access::target AccTarget =
879  static_cast<access::target>(Size & AccessTargetMask);
880  if ((AccTarget == access::target::device ||
881  AccTarget == access::target::constant_buffer) ||
882  (AccTarget == access::target::image ||
883  AccTarget == access::target::image_array)) {
884  detail::AccessorBaseHost *AccBase =
885  static_cast<detail::AccessorBaseHost *>(Ptr);
886  Ptr = detail::getSyclObjImpl(*AccBase).get();
887  } else if (AccTarget == access::target::local) {
888  detail::LocalAccessorBaseHost *LocalAccBase =
889  static_cast<detail::LocalAccessorBaseHost *>(Ptr);
890  Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
891  }
892  }
893  processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
894  IsESIMD);
895  }
896 }
897 
898 // Calling methods of kernel_impl requires knowledge of class layout.
899 // As this is impossible in header, there's a function that calls necessary
900 // method inside the library and returns the result.
901 detail::string handler::getKernelName() {
902  return detail::string{MKernel->get_info<info::kernel::function_name>()};
903 }
904 
905 void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) {
906  auto UsedKernelBundleImplPtr =
907  getOrInsertHandlerKernelBundle(/*Insert=*/false);
908  if (!UsedKernelBundleImplPtr)
909  return;
910 
911  // Implicit kernel bundles are populated late so we ignore them
912  if (!MImpl->isStateExplicitKernelBundle())
913  return;
914 
915  kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
916  device Dev =
917  MGraph ? MGraph->getDevice() : detail::getDeviceFromHandler(*this);
918  if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
919  throw sycl::exception(
921  "The kernel bundle in use does not contain the kernel");
922 }
923 
924 void handler::ext_oneapi_barrier(const std::vector<event> &WaitList) {
925  throwIfActionIsCreated();
926  MCGType = detail::CG::BarrierWaitlist;
927  MEventsWaitWithBarrier.resize(WaitList.size());
928  std::transform(
929  WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
930  [](const event &Event) { return detail::getSyclObjImpl(Event); });
931 }
932 
933 using namespace sycl::detail;
934 bool handler::DisableRangeRounding() {
936 }
937 
938 bool handler::RangeRoundingTrace() {
940 }
941 
942 void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
943  size_t &MinRange) {
944  SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
945  MinFactor, GoodFactor, MinRange);
946 }
947 
948 void handler::memcpy(void *Dest, const void *Src, size_t Count) {
949  throwIfActionIsCreated();
950  MSrcPtr = const_cast<void *>(Src);
951  MDstPtr = Dest;
952  MLength = Count;
953  setType(detail::CG::CopyUSM);
954 }
955 
956 void handler::memset(void *Dest, int Value, size_t Count) {
957  throwIfActionIsCreated();
958  MDstPtr = Dest;
959  MPattern.push_back(static_cast<char>(Value));
960  MLength = Count;
961  setUserFacingNodeType(ext::oneapi::experimental::node_type::memset);
962  setType(detail::CG::FillUSM);
963 }
964 
965 void handler::prefetch(const void *Ptr, size_t Count) {
966  throwIfActionIsCreated();
967  MDstPtr = const_cast<void *>(Ptr);
968  MLength = Count;
969  setType(detail::CG::PrefetchUSM);
970 }
971 
972 void handler::mem_advise(const void *Ptr, size_t Count, int Advice) {
973  throwIfActionIsCreated();
974  MDstPtr = const_cast<void *>(Ptr);
975  MLength = Count;
976  MImpl->MAdvice = static_cast<pi_mem_advice>(Advice);
977  setType(detail::CG::AdviseUSM);
978 }
979 
980 void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch,
981  const void *Src, size_t SrcPitch,
982  size_t Width, size_t Height) {
983  // Checks done in callers.
984  MSrcPtr = const_cast<void *>(Src);
985  MDstPtr = Dest;
986  MImpl->MSrcPitch = SrcPitch;
987  MImpl->MDstPitch = DestPitch;
988  MImpl->MWidth = Width;
989  MImpl->MHeight = Height;
990  setType(detail::CG::Copy2DUSM);
991 }
992 
993 void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch,
994  const void *Value, size_t ValueSize,
995  size_t Width, size_t Height) {
996  // Checks done in callers.
997  MDstPtr = Dest;
998  MPattern.resize(ValueSize);
999  std::memcpy(MPattern.data(), Value, ValueSize);
1000  MImpl->MDstPitch = DestPitch;
1001  MImpl->MWidth = Width;
1002  MImpl->MHeight = Height;
1003  setType(detail::CG::Fill2DUSM);
1004 }
1005 
1006 void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value,
1007  size_t Width, size_t Height) {
1008  // Checks done in callers.
1009  MDstPtr = Dest;
1010  MPattern.push_back(static_cast<char>(Value));
1011  MImpl->MDstPitch = DestPitch;
1012  MImpl->MWidth = Width;
1013  MImpl->MHeight = Height;
1014  setType(detail::CG::Memset2DUSM);
1015 }
1016 
1020  throwIfGraphAssociated<
1021  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1022  sycl_ext_oneapi_bindless_images>();
1023  Desc.verify();
1024 
1025  MSrcPtr = Src;
1026  MDstPtr = Dest.raw_handle;
1027 
1029  PiDesc.image_width = Desc.width;
1030  PiDesc.image_height = Desc.height;
1031  PiDesc.image_depth = Desc.depth;
1032  PiDesc.image_array_size = Desc.array_size;
1033 
1034  if (Desc.array_size > 1) {
1035  // Image Array.
1036  PiDesc.image_type =
1038 
1039  // Cubemap.
1040  PiDesc.image_type =
1041  Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1043  : PiDesc.image_type;
1044  } else {
1045  PiDesc.image_type =
1046  Desc.depth > 0
1049  }
1050 
1052  PiFormat.image_channel_data_type =
1054  PiFormat.image_channel_order =
1056 
1057  MImpl->MSrcOffset = {0, 0, 0};
1058  MImpl->MDestOffset = {0, 0, 0};
1059  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1060  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1061  MImpl->MImageDesc = PiDesc;
1062  MImpl->MImageFormat = PiFormat;
1063  MImpl->MImageCopyFlags =
1065  setType(detail::CG::CopyImage);
1066 }
1067 
1069  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1072  sycl::range<3> CopyExtent) {
1073  throwIfGraphAssociated<
1074  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1075  sycl_ext_oneapi_bindless_images>();
1076  DestImgDesc.verify();
1077 
1078  MSrcPtr = Src;
1079  MDstPtr = Dest.raw_handle;
1080 
1082  PiDesc.image_width = DestImgDesc.width;
1083  PiDesc.image_height = DestImgDesc.height;
1084  PiDesc.image_depth = DestImgDesc.depth;
1085  PiDesc.image_array_size = DestImgDesc.array_size;
1086 
1087  if (DestImgDesc.array_size > 1) {
1088  // Image Array.
1089  PiDesc.image_type = DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY
1091 
1092  // Cubemap.
1093  PiDesc.image_type =
1094  DestImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1096  : PiDesc.image_type;
1097  } else {
1098  PiDesc.image_type = DestImgDesc.depth > 0
1100  : (DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1102  }
1103 
1105  PiFormat.image_channel_data_type =
1107  PiFormat.image_channel_order =
1109 
1110  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1111  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1112  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1113  MImpl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]};
1114  MImpl->MImageDesc = PiDesc;
1115  MImpl->MImageFormat = PiFormat;
1116  MImpl->MImageCopyFlags =
1118  setType(detail::CG::CopyImage);
1119 }
1120 
1124  throwIfGraphAssociated<
1125  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1126  sycl_ext_oneapi_bindless_images>();
1127  Desc.verify();
1128 
1129  MSrcPtr = Src.raw_handle;
1130  MDstPtr = Dest;
1131 
1133  PiDesc.image_width = Desc.width;
1134  PiDesc.image_height = Desc.height;
1135  PiDesc.image_depth = Desc.depth;
1136  PiDesc.image_array_size = Desc.array_size;
1137 
1138  if (Desc.array_size > 1) {
1139  // Image Array.
1140  PiDesc.image_type =
1142 
1143  // Cubemap.
1144  PiDesc.image_type =
1145  Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1147  : PiDesc.image_type;
1148  } else {
1149  PiDesc.image_type =
1150  Desc.depth > 0
1153  }
1154 
1156  PiFormat.image_channel_data_type =
1158  PiFormat.image_channel_order =
1160 
1161  MImpl->MSrcOffset = {0, 0, 0};
1162  MImpl->MDestOffset = {0, 0, 0};
1163  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1164  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1165  MImpl->MImageDesc = PiDesc;
1166  MImpl->MImageFormat = PiFormat;
1167  MImpl->MImageCopyFlags =
1169  setType(detail::CG::CopyImage);
1170 }
1171 
1176  throwIfGraphAssociated<
1177  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1178  sycl_ext_oneapi_bindless_images>();
1179  ImageDesc.verify();
1180 
1181  MSrcPtr = Src.raw_handle;
1182  MDstPtr = Dest.raw_handle;
1183 
1185  PiDesc.image_width = ImageDesc.width;
1186  PiDesc.image_height = ImageDesc.height;
1187  PiDesc.image_depth = ImageDesc.depth;
1188  PiDesc.image_array_size = ImageDesc.array_size;
1189  if (ImageDesc.array_size > 1) {
1190  // Image Array.
1191  PiDesc.image_type = ImageDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY
1193 
1194  // Cubemap.
1195  PiDesc.image_type =
1196  ImageDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1198  : PiDesc.image_type;
1199  } else {
1200  PiDesc.image_type = ImageDesc.depth > 0
1202  : (ImageDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1204  }
1205 
1207  PiFormat.image_channel_data_type =
1209  PiFormat.image_channel_order =
1211 
1212  MImpl->MSrcOffset = {0, 0, 0};
1213  MImpl->MDestOffset = {0, 0, 0};
1214  MImpl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
1215  MImpl->MHostExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
1216  MImpl->MImageDesc = PiDesc;
1217  MImpl->MImageFormat = PiFormat;
1218  MImpl->MImageCopyFlags =
1220  setType(detail::CG::CopyImage);
1221 }
1222 
1225  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1226  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1227  sycl::range<3> CopyExtent) {
1228  throwIfGraphAssociated<
1229  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1230  sycl_ext_oneapi_bindless_images>();
1231  SrcImgDesc.verify();
1232 
1233  MSrcPtr = Src.raw_handle;
1234  MDstPtr = Dest;
1235 
1237  PiDesc.image_width = SrcImgDesc.width;
1238  PiDesc.image_height = SrcImgDesc.height;
1239  PiDesc.image_depth = SrcImgDesc.depth;
1240  PiDesc.image_array_size = SrcImgDesc.array_size;
1241 
1242  if (SrcImgDesc.array_size > 1) {
1243  // Image Array.
1244  PiDesc.image_type = SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY
1246 
1247  // Cubemap.
1248  PiDesc.image_type =
1249  SrcImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1251  : PiDesc.image_type;
1252  } else {
1253  PiDesc.image_type = SrcImgDesc.depth > 0
1255  : (SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1257  }
1258 
1260  PiFormat.image_channel_data_type =
1262  PiFormat.image_channel_order =
1264 
1265  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1266  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1267  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1268  MImpl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]};
1269  MImpl->MImageDesc = PiDesc;
1270  MImpl->MImageFormat = PiFormat;
1271  MImpl->MImageCopyFlags =
1273  setType(detail::CG::CopyImage);
1274 }
1275 
1277  void *Src, void *Dest,
1278  const ext::oneapi::experimental::image_descriptor &Desc, size_t Pitch) {
1279  throwIfGraphAssociated<
1280  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1281  sycl_ext_oneapi_bindless_images>();
1282  Desc.verify();
1283 
1284  MSrcPtr = Src;
1285  MDstPtr = Dest;
1286 
1288  PiDesc.image_width = Desc.width;
1289  PiDesc.image_height = Desc.height;
1290  PiDesc.image_depth = Desc.depth;
1291  PiDesc.image_array_size = Desc.array_size;
1292 
1293  if (Desc.array_size > 1) {
1294  // Image Array.
1295  PiDesc.image_type =
1297 
1298  // Cubemap.
1299  PiDesc.image_type =
1300  Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1302  : PiDesc.image_type;
1303  } else {
1304  PiDesc.image_type =
1305  Desc.depth > 0
1308  }
1309 
1311  PiFormat.image_channel_data_type =
1313  PiFormat.image_channel_order =
1315 
1316  MImpl->MSrcOffset = {0, 0, 0};
1317  MImpl->MDestOffset = {0, 0, 0};
1318  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1319  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1320  MImpl->MImageDesc = PiDesc;
1321  MImpl->MImageDesc.image_row_pitch = Pitch;
1322  MImpl->MImageFormat = PiFormat;
1323  MImpl->MImageCopyFlags = detail::getPiImageCopyFlags(
1324  get_pointer_type(Src, MQueue->get_context()),
1325  get_pointer_type(Dest, MQueue->get_context()));
1326  setType(detail::CG::CopyImage);
1327 }
1328 
1330  void *Src, sycl::range<3> SrcOffset, void *Dest, sycl::range<3> DestOffset,
1331  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1332  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1333  sycl::range<3> CopyExtent) {
1334  throwIfGraphAssociated<
1335  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1336  sycl_ext_oneapi_bindless_images>();
1337  DeviceImgDesc.verify();
1338 
1339  MSrcPtr = Src;
1340  MDstPtr = Dest;
1341 
1343  PiDesc.image_width = DeviceImgDesc.width;
1344  PiDesc.image_height = DeviceImgDesc.height;
1345  PiDesc.image_depth = DeviceImgDesc.depth;
1346  PiDesc.image_array_size = DeviceImgDesc.array_size;
1347 
1348  if (DeviceImgDesc.array_size > 1) {
1349  // Image Array.
1350  PiDesc.image_type = DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY
1352 
1353  // Cubemap.
1354  PiDesc.image_type =
1355  DeviceImgDesc.type ==
1356  sycl::ext::oneapi::experimental::image_type::cubemap
1358  : PiDesc.image_type;
1359  } else {
1360  PiDesc.image_type = DeviceImgDesc.depth > 0
1362  : (DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1364  }
1365 
1367  PiFormat.image_channel_data_type =
1369  PiFormat.image_channel_order =
1371 
1372  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1373  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1374  MImpl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]};
1375  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1376  MImpl->MImageDesc = PiDesc;
1377  MImpl->MImageDesc.image_row_pitch = DeviceRowPitch;
1378  MImpl->MImageFormat = PiFormat;
1379  MImpl->MImageCopyFlags = detail::getPiImageCopyFlags(
1380  get_pointer_type(Src, MQueue->get_context()),
1381  get_pointer_type(Dest, MQueue->get_context()));
1382  setType(detail::CG::CopyImage);
1383 }
1384 
1387  throwIfGraphAssociated<
1388  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1389  sycl_ext_oneapi_bindless_images>();
1390  MImpl->MInteropSemaphoreHandle =
1392  setType(detail::CG::SemaphoreWait);
1393 }
1394 
1397  throwIfGraphAssociated<
1398  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1399  sycl_ext_oneapi_bindless_images>();
1400  MImpl->MInteropSemaphoreHandle =
1402  setType(detail::CG::SemaphoreSignal);
1403 }
1404 
1406  const kernel_bundle<bundle_state::executable> &ExecBundle) {
1407  std::shared_ptr<detail::queue_impl> PrimaryQueue =
1408  MImpl->MSubmissionPrimaryQueue;
1409  if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) ||
1410  (MGraph && (MGraph->getContext() != ExecBundle.get_context())))
1411  throw sycl::exception(
1413  "Context associated with the primary queue is different from the "
1414  "context associated with the kernel bundle");
1415 
1416  std::shared_ptr<detail::queue_impl> SecondaryQueue =
1417  MImpl->MSubmissionSecondaryQueue;
1418  if (SecondaryQueue &&
1419  SecondaryQueue->get_context() != ExecBundle.get_context())
1420  throw sycl::exception(
1422  "Context associated with the secondary queue is different from the "
1423  "context associated with the kernel bundle");
1424 
1425  setStateExplicitKernelBundle();
1426  setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle));
1427 }
1428 
1430  auto EventImpl = detail::getSyclObjImpl(Event);
1431  depends_on(EventImpl);
1432 }
1433 
1434 void handler::depends_on(const std::vector<event> &Events) {
1435  for (const event &Event : Events) {
1436  depends_on(Event);
1437  }
1438 }
1439 
1441  if (!EventImpl)
1442  return;
1443  if (EventImpl->isDiscarded()) {
1445  "Queue operation cannot depend on discarded event.");
1446  }
1447  if (auto Graph = getCommandGraph(); Graph) {
1448  auto EventGraph = EventImpl->getCommandGraph();
1449  if (EventGraph == nullptr) {
1450  throw sycl::exception(
1452  "Graph nodes cannot depend on events from outside the graph.");
1453  }
1454  if (EventGraph != Graph) {
1455  throw sycl::exception(
1457  "Graph nodes cannot depend on events from another graph.");
1458  }
1459  }
1460  CGData.MEvents.push_back(EventImpl);
1461 }
1462 
1463 void handler::depends_on(const std::vector<detail::EventImplPtr> &Events) {
1464  for (const EventImplPtr &Event : Events) {
1465  depends_on(Event);
1466  }
1467 }
1468 
1469 static bool
1470 checkContextSupports(const std::shared_ptr<detail::context_impl> &ContextImpl,
1471  sycl::detail::pi::PiContextInfo InfoQuery) {
1472  auto &Plugin = ContextImpl->getPlugin();
1473  pi_bool SupportsOp = false;
1474  Plugin->call<detail::PiApiKind::piContextGetInfo>(ContextImpl->getHandleRef(),
1475  InfoQuery, sizeof(pi_bool),
1476  &SupportsOp, nullptr);
1477  return SupportsOp;
1478 }
1479 
1480 void handler::verifyDeviceHasProgressGuarantee(
1485  using forward_progress =
1487  auto deviceImplPtr = MQueue->getDeviceImplPtr();
1488  const bool supported = deviceImplPtr->supportsForwardProgress(
1489  guarantee, threadScope, coordinationScope);
1490  if (threadScope == execution_scope::work_group) {
1491  if (!supported) {
1492  throw sycl::exception(
1493  sycl::errc::feature_not_supported,
1494  "Required progress guarantee for work groups is not "
1495  "supported by this device.");
1496  }
1497  // If we are here, the device supports the guarantee required but there is a
1498  // caveat in that if the guarantee required is a concurrent guarantee, then
1499  // we most likely also need to enable cooperative launch of the kernel. That
1500  // is, although the device supports the required guarantee, some setup work
1501  // is needed to truly make the device provide that guarantee at runtime.
1502  // Otherwise, we will get the default guarantee which is weaker than
1503  // concurrent. Same reasoning applies for sub_group but not for work_item.
1504  // TODO: Further design work is probably needed to reflect this behavior in
1505  // Unified Runtime.
1506  if (guarantee == forward_progress::concurrent)
1507  setKernelIsCooperative(true);
1508  } else if (threadScope == execution_scope::sub_group) {
1509  if (!supported) {
1510  throw sycl::exception(sycl::errc::feature_not_supported,
1511  "Required progress guarantee for sub groups is not "
1512  "supported by this device.");
1513  }
1514  // Same reasoning as above.
1515  if (guarantee == forward_progress::concurrent)
1516  setKernelIsCooperative(true);
1517  } else { // threadScope is execution_scope::work_item otherwise undefined
1518  // behavior
1519  if (!supported) {
1520  throw sycl::exception(sycl::errc::feature_not_supported,
1521  "Required progress guarantee for work items is not "
1522  "supported by this device.");
1523  }
1524  }
1525 }
1526 
1527 bool handler::supportsUSMMemcpy2D() {
1528  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1529  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1530  if (QueueImpl &&
1531  !checkContextSupports(QueueImpl->getContextImplPtr(),
1533  return false;
1534  }
1535  return true;
1536 }
1537 
1538 bool handler::supportsUSMFill2D() {
1539  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1540  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1541  if (QueueImpl &&
1542  !checkContextSupports(QueueImpl->getContextImplPtr(),
1544  return false;
1545  }
1546  return true;
1547 }
1548 
1549 bool handler::supportsUSMMemset2D() {
1550  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1551  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1552  if (QueueImpl &&
1553  !checkContextSupports(QueueImpl->getContextImplPtr(),
1555  return false;
1556  }
1557  return true;
1558 }
1559 
1560 id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) {
1561  device Dev = MQueue->get_device();
1562  range<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
1563  Dev.get_info<info::device::max_compute_units>();
1564  return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
1565 }
1566 
1567 void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr,
1568  size_t Size, bool Block) {
1569  MImpl->HostPipeName = Name.data();
1570  MImpl->HostPipePtr = Ptr;
1571  MImpl->HostPipeTypeSize = Size;
1572  MImpl->HostPipeBlocking = Block;
1573  MImpl->HostPipeRead = 1;
1575 }
1576 
1577 void handler::ext_intel_write_host_pipe(detail::string_view Name, void *Ptr,
1578  size_t Size, bool Block) {
1579  MImpl->HostPipeName = Name.data();
1580  MImpl->HostPipePtr = Ptr;
1581  MImpl->HostPipeTypeSize = Size;
1582  MImpl->HostPipeBlocking = Block;
1583  MImpl->HostPipeRead = 0;
1585 }
1586 
1587 void handler::memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src,
1588  bool IsDeviceImageScoped, size_t NumBytes,
1589  size_t Offset) {
1590  throwIfActionIsCreated();
1591  MSrcPtr = const_cast<void *>(Src);
1592  MDstPtr = const_cast<void *>(DeviceGlobalPtr);
1593  MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1594  MLength = NumBytes;
1595  MImpl->MOffset = Offset;
1597 }
1598 
1599 void handler::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
1600  bool IsDeviceImageScoped, size_t NumBytes,
1601  size_t Offset) {
1602  throwIfActionIsCreated();
1603  MSrcPtr = const_cast<void *>(DeviceGlobalPtr);
1604  MDstPtr = Dest;
1605  MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1606  MLength = NumBytes;
1607  MImpl->MOffset = Offset;
1609 }
1610 
1611 void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr,
1612  const void *Src,
1613  size_t DeviceGlobalTSize,
1614  bool IsDeviceImageScoped,
1615  size_t NumBytes, size_t Offset) {
1616  std::weak_ptr<detail::context_impl> WeakContextImpl =
1617  MQueue->getContextImplPtr();
1618  std::weak_ptr<detail::device_impl> WeakDeviceImpl =
1619  MQueue->getDeviceImplPtr();
1620  host_task([=] {
1621  // Capture context and device as weak to avoid keeping them alive for too
1622  // long. If they are dead by the time this executes, the operation would not
1623  // have been visible anyway.
1624  std::shared_ptr<detail::context_impl> ContextImpl = WeakContextImpl.lock();
1625  std::shared_ptr<detail::device_impl> DeviceImpl = WeakDeviceImpl.lock();
1626  if (ContextImpl && DeviceImpl)
1627  ContextImpl->memcpyToHostOnlyDeviceGlobal(
1628  DeviceImpl, DeviceGlobalPtr, Src, DeviceGlobalTSize,
1629  IsDeviceImageScoped, NumBytes, Offset);
1630  });
1631 }
1632 
1633 void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest,
1634  const void *DeviceGlobalPtr,
1635  bool IsDeviceImageScoped,
1636  size_t NumBytes, size_t Offset) {
1637  const std::shared_ptr<detail::context_impl> &ContextImpl =
1638  MQueue->getContextImplPtr();
1639  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1640  MQueue->getDeviceImplPtr();
1641  host_task([=] {
1642  // Unlike memcpy to device_global, we need to keep the context and device
1643  // alive in the capture of this operation as we must be able to correctly
1644  // copy the value to the user-specified pointer.
1645  ContextImpl->memcpyFromHostOnlyDeviceGlobal(
1646  DeviceImpl, Dest, DeviceGlobalPtr, IsDeviceImageScoped, NumBytes,
1647  Offset);
1648  });
1649 }
1650 
1651 const std::shared_ptr<detail::context_impl> &
1652 handler::getContextImplPtr() const {
1653  return MQueue->getContextImplPtr();
1654 }
1655 
1656 void handler::setKernelCacheConfig(
1658  MImpl->MKernelCacheConfig = Config;
1659 }
1660 
1661 void handler::setKernelIsCooperative(bool KernelIsCooperative) {
1662  MImpl->MKernelIsCooperative = KernelIsCooperative;
1663 }
1664 
1668  Graph) {
1670  MExecGraph = detail::getSyclObjImpl(Graph);
1671 }
1672 
1673 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1674 handler::getCommandGraph() const {
1675  if (MGraph) {
1676  return MGraph;
1677  }
1678  return MQueue->getCommandGraph();
1679 }
1680 
1681 void handler::setUserFacingNodeType(ext::oneapi::experimental::node_type Type) {
1682  MImpl->MUserFacingNodeType = Type;
1683 }
1684 
1685 std::optional<std::array<size_t, 3>> handler::getMaxWorkGroups() {
1687  std::array<size_t, 3> PiResult = {};
1688  auto Ret = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1689  Dev->getHandleRef(),
1690  PiInfoCode<
1691  ext::oneapi::experimental::info::device::max_work_groups<3>>::value,
1692  sizeof(PiResult), &PiResult, nullptr);
1693  if (Ret == PI_SUCCESS) {
1694  return PiResult;
1695  }
1696  return {};
1697 }
1698 
1699 std::tuple<std::array<size_t, 3>, bool> handler::getMaxWorkGroups_v2() {
1700  auto ImmRess = getMaxWorkGroups();
1701  if (ImmRess)
1702  return {*ImmRess, true};
1703  return {std::array<size_t, 3>{0, 0, 0}, false};
1704 }
1705 
1706 void handler::setNDRangeUsed(bool Value) { MImpl->MNDRangeUsed = Value; }
1707 
1708 void handler::registerDynamicParameter(
1709  ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase,
1710  int ArgIndex) {
1711  if (MQueue && MQueue->getCommandGraph()) {
1712  throw sycl::exception(
1714  "Dynamic Parameters cannot be used with Graph Queue recording.");
1715  }
1716  if (!MGraph) {
1717  throw sycl::exception(
1719  "Dynamic Parameters cannot be used with normal SYCL submissions");
1720  }
1721 
1722  auto ParamImpl = detail::getSyclObjImpl(DynamicParamBase);
1723  if (ParamImpl->MGraph != this->MGraph) {
1724  throw sycl::exception(
1726  "Cannot use a Dynamic Parameter with a node associated with a graph "
1727  "other than the one it was created with.");
1728  }
1729  MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex);
1730 }
1731 } // namespace _V1
1732 } // namespace sycl
sycl::range< 3 > GlobalSize
Definition: cg_types.hpp:131
sycl::range< 3 > NumWorkGroups
Number of workgroups, used to record the number of workgroups from the simplest form of parallel_for_...
Definition: cg_types.hpp:137
sycl::id< 3 > GlobalOffset
Definition: cg_types.hpp:133
sycl::range< 3 > LocalSize
Definition: cg_types.hpp:132
static ProgramManager & getInstance()
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
bool kernelUsesAssert(const std::string &KernelName) const
static Scheduler & getInstance()
Definition: scheduler.cpp:261
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:95
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
Definition: scheduler.cpp:745
const char * c_str() const noexcept
Definition: string.hpp:60
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
std::unique_lock< std::shared_mutex > WriteLock
Definition: graph_impl.hpp:838
Command group handler class.
Definition: handler.hpp:460
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1429
void ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: handler.cpp:1385
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
Definition: handler.cpp:1665
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:948
void ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: handler.cpp:1017
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:972
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:965
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:956
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2907
friend class stream
Definition: handler.hpp:3382
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
Definition: handler.hpp:2064
void ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: handler.cpp:1395
void use_kernel_bundle(const kernel_bundle< bundle_state::executable > &ExecBundle)
Definition: handler.cpp:1405
context get_context() const noexcept
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
bool trace(TraceLevel level)
Definition: pi.cpp:366
::pi_result PiResult
Definition: pi.hpp:128
::pi_interop_semaphore_handle PiInteropSemaphoreHandle
Definition: pi.hpp:165
sycl::detail::pi::PiMemImageChannelOrder convertChannelOrder(image_channel_order Order)
Definition: image_impl.cpp:111
void * getValueFromDynamicParameter(ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase)
Definition: handler.cpp:75
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
Definition: accessor.cpp:17
constexpr const char * SYCL_STREAM_NAME
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
Definition: handler.cpp:40
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:48
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
AccessorImplHost Requirement
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative)
Definition: commands.cpp:2559
kernel_id get_kernel_id_impl(string_view KernelName)
sycl::detail::pi::PiMemImageChannelType convertChannelType(image_channel_type Type)
Definition: image_impl.cpp:187
std::shared_ptr< AccessorImplHost > AccessorImplPtr
Definition: accessor.hpp:514
sycl::detail::pi::PiImageCopyFlags getPiImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType)
Definition: handler.cpp:48
node_type getNodeTypeFromCG(sycl::detail::CG::CGTYPE CGType)
Definition: graph_impl.hpp:43
@ executable
In executable state, the graph is ready to execute.
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, _Tp > arg(const complex< _Tp > &__c)
constexpr size_t MaxNumAdditionalArgs
Definition: handler.cpp:837
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
image_target
Definition: access.hpp:74
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:575
static bool checkContextSupports(const std::shared_ptr< detail::context_impl > &ContextImpl, sycl::detail::pi::PiContextInfo InfoQuery)
Definition: handler.cpp:1470
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:93
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
Definition: handler.cpp:656
Definition: access.hpp:18
_pi_kernel_cache_config
Definition: pi.h:830
int32_t pi_int32
Definition: pi.h:221
pi_uint32 pi_bool
Definition: pi.h:224
_pi_mem_advice
Definition: pi.h:621
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:536
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:78
_pi_image_copy_flags
Definition: pi.h:698
@ PI_IMAGE_COPY_DEVICE_TO_DEVICE
Definition: pi.h:701
@ PI_IMAGE_COPY_DEVICE_TO_HOST
Definition: pi.h:700
@ PI_IMAGE_COPY_HOST_TO_DEVICE
Definition: pi.h:699
@ PI_MEM_TYPE_IMAGE_CUBEMAP
Definition: pi.h:618
@ PI_MEM_TYPE_IMAGE1D
Definition: pi.h:615
@ PI_MEM_TYPE_IMAGE1D_ARRAY
Definition: pi.h:616
@ PI_MEM_TYPE_IMAGE2D
Definition: pi.h:612
@ PI_MEM_TYPE_IMAGE2D_ARRAY
Definition: pi.h:614
@ PI_MEM_TYPE_IMAGE3D
Definition: pi.h:613
pi_result piextEnqueueCooperativeKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:546
_pi_context_info
Definition: pi.h:501
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
Definition: pi.h:514
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:515
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
Definition: pi.h:513
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:124
C++ wrapper of extern "C" PI interfaces.
size_t image_height
Definition: pi.h:1192
size_t image_row_pitch
Definition: pi.h:1195
size_t image_depth
Definition: pi.h:1193
size_t image_width
Definition: pi.h:1191
pi_mem_type image_type
Definition: pi.h:1190
size_t image_array_size
Definition: pi.h:1194
pi_image_channel_type image_channel_data_type
Definition: pi.h:1186
pi_image_channel_order image_channel_order
Definition: pi.h:1185
std::vector< detail::AccessorImplPtr > MAccStorage
Storage for accessors.
Definition: cg.hpp:101
std::vector< detail::EventImplPtr > MEvents
List of events that order the execution of this CG.
Definition: cg.hpp:109
std::vector< AccessorImplHost * > MRequirements
List of requirements that specify which memory is needed for the command group to be executed.
Definition: cg.hpp:107
A struct to describe the properties of an image.