DPC++ Runtime
Runtime libraries for oneAPI DPC++
handler.cpp
Go to the documentation of this file.
1 //==-------- handler.cpp --- SYCL command group handler --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <algorithm>
10 
11 #include <detail/config.hpp>
13 #include <detail/graph_impl.hpp>
14 #include <detail/handler_impl.hpp>
15 #include <detail/image_impl.hpp>
17 #include <detail/kernel_impl.hpp>
18 #include <detail/queue_impl.hpp>
21 #include <detail/usm/usm_impl.hpp>
22 #include <sycl/detail/common.hpp>
23 #include <sycl/detail/helpers.hpp>
25 #include <sycl/detail/pi.h>
26 #include <sycl/detail/pi.hpp>
27 #include <sycl/event.hpp>
28 #include <sycl/handler.hpp>
29 #include <sycl/info/info_desc.hpp>
30 #include <sycl/stream.hpp>
31 
32 namespace sycl {
33 inline namespace _V1 {
34 
35 namespace detail {
36 
37 bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) {
38  DeviceGlobalMapEntry *DGEntry =
40  DeviceGlobalPtr);
41  return DGEntry && !DGEntry->MImageIdentifiers.empty();
42 }
43 
46  if (DstPtrType == sycl::usm::alloc::device) {
47  // Dest is on device
48  if (SrcPtrType == sycl::usm::alloc::device)
50  if (SrcPtrType == sycl::usm::alloc::host ||
51  SrcPtrType == sycl::usm::alloc::unknown)
54  "Unknown copy source location");
55  }
56  if (DstPtrType == sycl::usm::alloc::host ||
57  DstPtrType == sycl::usm::alloc::unknown) {
58  // Dest is on host
59  if (SrcPtrType == sycl::usm::alloc::device)
61  if (SrcPtrType == sycl::usm::alloc::host ||
62  SrcPtrType == sycl::usm::alloc::unknown)
64  "Cannot copy image from host to host");
66  "Unknown copy source location");
67  }
69  "Unknown copy destination location");
70 }
71 
72 } // namespace detail
73 
74 handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost)
75  : handler(Queue, Queue, nullptr, IsHost) {}
76 
77 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
78  std::shared_ptr<detail::queue_impl> PrimaryQueue,
79  std::shared_ptr<detail::queue_impl> SecondaryQueue,
80  bool IsHost)
81  : MImpl(std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
82  std::move(SecondaryQueue))),
83  MQueue(std::move(Queue)), MIsHost(IsHost) {}
84 
85 handler::handler(
86  std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph)
87  : MImpl(std::make_shared<detail::handler_impl>()), MGraph(Graph) {}
88 
89 // Sets the submission state to indicate that an explicit kernel bundle has been
90 // set. Throws a sycl::exception with errc::invalid if the current state
91 // indicates that a specialization constant has been set.
92 void handler::setStateExplicitKernelBundle() {
93  MImpl->setStateExplicitKernelBundle();
94 }
95 
96 // Sets the submission state to indicate that a specialization constant has been
97 // set. Throws a sycl::exception with errc::invalid if the current state
98 // indicates that an explicit kernel bundle has been set.
99 void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); }
100 
101 // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and
102 // false otherwise.
103 bool handler::isStateExplicitKernelBundle() const {
104  return MImpl->isStateExplicitKernelBundle();
105 }
106 
107 // Returns a shared_ptr to the kernel_bundle.
108 // If there is no kernel_bundle created:
109 // returns newly created kernel_bundle if Insert is true
110 // returns shared_ptr(nullptr) if Insert is false
111 std::shared_ptr<detail::kernel_bundle_impl>
112 handler::getOrInsertHandlerKernelBundle(bool Insert) const {
113  if (!MImpl->MKernelBundle && Insert) {
114  auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context();
115  auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
116  MImpl->MKernelBundle = detail::getSyclObjImpl(
117  get_kernel_bundle<bundle_state::input>(Ctx, {Dev}, {}));
118  }
119  return MImpl->MKernelBundle;
120 }
121 
122 // Sets kernel bundle to the provided one.
123 void handler::setHandlerKernelBundle(
124  const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
125  MImpl->MKernelBundle = NewKernelBundleImpPtr;
126 }
127 
128 void handler::setHandlerKernelBundle(kernel Kernel) {
129  // Kernel may not have an associated kernel bundle if it is created from a
130  // program. As such, apply getSyclObjImpl directly on the kernel, i.e. not
131  // the other way around: getSyclObjImp(Kernel->get_kernel_bundle()).
132  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpl =
133  detail::getSyclObjImpl(Kernel)->get_kernel_bundle();
134  setHandlerKernelBundle(KernelBundleImpl);
135 }
136 
137 event handler::finalize() {
138  // This block of code is needed only for reduction implementation.
139  // It is harmless (does nothing) for everything else.
140  if (MIsFinalized)
141  return MLastEvent;
142  MIsFinalized = true;
143 
144  // If we have a subgraph node that means that a subgraph was recorded as
145  // part of this queue submission, so we skip adding a new node here since
146  // they have already been added, and return the event associated with the
147  // subgraph node.
148  if (MQueue && MQueue->getCommandGraph() && MSubgraphNode) {
149  return detail::createSyclObjFromImpl<event>(
150  MQueue->getCommandGraph()->getEventForNode(MSubgraphNode));
151  }
152 
153  // According to 4.7.6.9 of SYCL2020 spec, if a placeholder accessor is passed
154  // to a command without being bound to a command group, an exception should
155  // be thrown.
156  {
157  for (const auto &arg : MArgs) {
159  continue;
160 
161  detail::Requirement *AccImpl =
162  static_cast<detail::Requirement *>(arg.MPtr);
163  if (AccImpl->MIsPlaceH) {
164  auto It = std::find(CGData.MRequirements.begin(),
165  CGData.MRequirements.end(), AccImpl);
166  if (It == CGData.MRequirements.end())
168  "placeholder accessor must be bound by calling "
169  "handler::require() before it can be used.");
170  }
171  }
172  }
173 
174  const auto &type = getType();
175  if (type == detail::CG::Kernel) {
176  // If there were uses of set_specialization_constant build the kernel_bundle
177  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
178  getOrInsertHandlerKernelBundle(/*Insert=*/false);
179  if (KernelBundleImpPtr) {
180  // Make sure implicit non-interop kernel bundles have the kernel
181  if (!KernelBundleImpPtr->isInterop() &&
182  !MImpl->isStateExplicitKernelBundle()) {
183  auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
184  kernel_id KernelID =
186  bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
187  // If kernel was not inserted and the bundle is in input mode we try
188  // building it and trying to find the kernel in executable mode
189  if (!KernelInserted &&
190  KernelBundleImpPtr->get_bundle_state() == bundle_state::input) {
191  auto KernelBundle =
192  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
193  KernelBundleImpPtr);
194  kernel_bundle<bundle_state::executable> ExecKernelBundle =
195  build(KernelBundle);
196  KernelBundleImpPtr = detail::getSyclObjImpl(ExecKernelBundle);
197  setHandlerKernelBundle(KernelBundleImpPtr);
198  KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
199  }
200  // If the kernel was not found in executable mode we throw an exception
201  if (!KernelInserted)
203  "Failed to add kernel to kernel bundle.");
204  }
205 
206  switch (KernelBundleImpPtr->get_bundle_state()) {
207  case bundle_state::input: {
208  // Underlying level expects kernel_bundle to be in executable state
209  kernel_bundle<bundle_state::executable> ExecBundle = build(
210  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
211  KernelBundleImpPtr));
212  KernelBundleImpPtr = detail::getSyclObjImpl(ExecBundle);
213  setHandlerKernelBundle(KernelBundleImpPtr);
214  break;
215  }
217  // Nothing to do
218  break;
221  assert(0 && "Expected that the bundle is either in input or executable "
222  "states.");
223  break;
224  }
225  }
226 
227  if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() &&
228  !MQueue->is_in_fusion_mode() &&
229  CGData.MRequirements.size() + CGData.MEvents.size() +
230  MStreamStorage.size() ==
231  0) {
232  // if user does not add a new dependency to the dependency graph, i.e.
233  // the graph is not changed, and the queue is not in fusion mode, then
234  // this faster path is used to submit kernel bypassing scheduler and
235  // avoiding CommandGroup, Command objects creation.
236 
237  std::vector<sycl::detail::pi::PiEvent> RawEvents;
238  detail::EventImplPtr NewEvent;
239 
240 #ifdef XPTI_ENABLE_INSTRUMENTATION
241  // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent,
242  int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME);
243  auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
244  StreamID, MKernel, MCodeLoc, MKernelName, MQueue, MNDRDesc,
245  KernelBundleImpPtr, MArgs);
246  auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
247  InstanceID = InstanceID]() {
248 #else
249  auto EnqueueKernel = [&]() {
250 #endif
251  // 'Result' for single point of return
252  pi_int32 Result = PI_ERROR_INVALID_VALUE;
253 #ifdef XPTI_ENABLE_INSTRUMENTATION
254  detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
255  xpti::trace_task_begin, nullptr);
256 #endif
257  if (MQueue->is_host()) {
258  MHostKernel->call(MNDRDesc, (NewEvent)
259  ? NewEvent->getHostProfilingInfo()
260  : nullptr);
261  Result = PI_SUCCESS;
262  } else {
263  if (MQueue->getDeviceImplPtr()->getBackend() ==
264  backend::ext_intel_esimd_emulator) {
265  // Capture the host timestamp for profiling (queue time)
266  if (NewEvent != nullptr)
267  NewEvent->setHostEnqueueTime();
268  MQueue->getPlugin()->call<detail::PiApiKind::piEnqueueKernelLaunch>(
269  nullptr, reinterpret_cast<pi_kernel>(MHostKernel->getPtr()),
270  MNDRDesc.Dims, &MNDRDesc.GlobalOffset[0],
271  &MNDRDesc.GlobalSize[0], &MNDRDesc.LocalSize[0], 0, nullptr,
272  nullptr);
273  Result = PI_SUCCESS;
274  } else {
275  Result =
276  enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr,
277  MKernel, MKernelName, RawEvents, NewEvent,
278  nullptr, MImpl->MKernelCacheConfig);
279  }
280  }
281 #ifdef XPTI_ENABLE_INSTRUMENTATION
282  detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
283  xpti::trace_task_end, nullptr);
284 #endif
285  return Result;
286  };
287 
288  bool DiscardEvent = false;
289  if (MQueue->has_discard_events_support()) {
290  // Kernel only uses assert if it's non interop one
291  bool KernelUsesAssert =
292  !(MKernel && MKernel->isInterop()) &&
294  DiscardEvent = !KernelUsesAssert;
295  }
296 
297  if (DiscardEvent) {
298  if (PI_SUCCESS != EnqueueKernel())
299  throw runtime_error("Enqueue process failed.",
300  PI_ERROR_INVALID_OPERATION);
301  } else {
302  NewEvent = std::make_shared<detail::event_impl>(MQueue);
303  NewEvent->setContextImpl(MQueue->getContextImplPtr());
304  NewEvent->setStateIncomplete();
305  NewEvent->setSubmissionTime();
306 
307  if (PI_SUCCESS != EnqueueKernel())
308  throw runtime_error("Enqueue process failed.",
309  PI_ERROR_INVALID_OPERATION);
310  else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr)
311  NewEvent->setComplete();
312 
313  MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
314  }
315  return MLastEvent;
316  }
317  }
318 
319  std::unique_ptr<detail::CG> CommandGroup;
320  switch (type) {
321  case detail::CG::Kernel: {
322  // Copy kernel name here instead of move so that it's available after
323  // running of this method by reductions implementation. This allows for
324  // assert feature to check if kernel uses assertions
325  CommandGroup.reset(new detail::CGExecKernel(
326  std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
327  std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs),
328  MKernelName, std::move(MStreamStorage),
329  std::move(MImpl->MAuxiliaryResources), MCGType,
330  MImpl->MKernelCacheConfig, MCodeLoc));
331  break;
332  }
336  CommandGroup.reset(
337  new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData),
338  std::move(MImpl->MAuxiliaryResources), MCodeLoc));
339  break;
340  case detail::CG::Fill:
341  CommandGroup.reset(new detail::CGFill(std::move(MPattern), MDstPtr,
342  std::move(CGData), MCodeLoc));
343  break;
345  CommandGroup.reset(
346  new detail::CGUpdateHost(MDstPtr, std::move(CGData), MCodeLoc));
347  break;
348  case detail::CG::CopyUSM:
349  CommandGroup.reset(new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength,
350  std::move(CGData), MCodeLoc));
351  break;
352  case detail::CG::FillUSM:
353  CommandGroup.reset(new detail::CGFillUSM(
354  std::move(MPattern), MDstPtr, MLength, std::move(CGData), MCodeLoc));
355  break;
357  CommandGroup.reset(new detail::CGPrefetchUSM(MDstPtr, MLength,
358  std::move(CGData), MCodeLoc));
359  break;
361  CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, MImpl->MAdvice,
362  std::move(CGData), MCGType,
363  MCodeLoc));
364  break;
366  CommandGroup.reset(new detail::CGCopy2DUSM(
367  MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth,
368  MImpl->MHeight, std::move(CGData), MCodeLoc));
369  break;
371  CommandGroup.reset(new detail::CGFill2DUSM(
372  std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth,
373  MImpl->MHeight, std::move(CGData), MCodeLoc));
374  break;
376  CommandGroup.reset(new detail::CGMemset2DUSM(
377  MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight,
378  std::move(CGData), MCodeLoc));
379  break;
381  CommandGroup.reset(new detail::CGHostTask(
382  std::move(MHostTask), MQueue, MQueue->getContextImplPtr(),
383  std::move(MArgs), std::move(CGData), MCGType, MCodeLoc));
384  break;
385  case detail::CG::Barrier:
387  if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) {
388  // if no event to wait for was specified, we add all exit
389  // nodes/events of the graph
390  if (MEventsWaitWithBarrier.size() == 0) {
391  MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
392  // Graph-wide barriers take precedence over previous one.
393  // We therefore remove the previous ones from ExtraDependencies list.
394  // The current barrier is then added to this list in the graph_impl.
395  std::vector<detail::EventImplPtr> EventsBarriers =
396  GraphImpl->removeBarriersFromExtraDependencies();
397  MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
398  std::begin(EventsBarriers),
399  std::end(EventsBarriers));
400  }
401  CGData.MEvents.insert(std::end(CGData.MEvents),
402  std::begin(MEventsWaitWithBarrier),
403  std::end(MEventsWaitWithBarrier));
404  // Barrier node is implemented as an empty node in Graph
405  // but keep the barrier type to help managing dependencies
406  MCGType = detail::CG::Barrier;
407  CommandGroup.reset(
408  new detail::CG(detail::CG::Barrier, std::move(CGData), MCodeLoc));
409  } else {
410  CommandGroup.reset(
411  new detail::CGBarrier(std::move(MEventsWaitWithBarrier),
412  std::move(CGData), MCGType, MCodeLoc));
413  }
414  break;
415  }
417  CommandGroup.reset(new detail::CGCopyToDeviceGlobal(
418  MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
419  std::move(CGData), MCodeLoc));
420  break;
421  }
423  CommandGroup.reset(new detail::CGCopyFromDeviceGlobal(
424  MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
425  std::move(CGData), MCodeLoc));
426  break;
427  }
429  CommandGroup.reset(new detail::CGReadWriteHostPipe(
430  MImpl->HostPipeName, MImpl->HostPipeBlocking, MImpl->HostPipePtr,
431  MImpl->HostPipeTypeSize, MImpl->HostPipeRead, std::move(CGData),
432  MCodeLoc));
433  break;
434  }
436  // If we have a subgraph node we don't want to actually execute this command
437  // graph submission.
438  if (!MSubgraphNode) {
439  event GraphCompletionEvent =
440  MExecGraph->enqueue(MQueue, std::move(CGData));
441  MLastEvent = GraphCompletionEvent;
442  return MLastEvent;
443  }
444  break;
446  CommandGroup.reset(new detail::CGCopyImage(
447  MSrcPtr, MDstPtr, MImpl->MImageDesc, MImpl->MImageFormat,
448  MImpl->MImageCopyFlags, MImpl->MSrcOffset, MImpl->MDestOffset,
449  MImpl->MHostExtent, MImpl->MCopyExtent, std::move(CGData), MCodeLoc));
450  break;
452  CommandGroup.reset(new detail::CGSemaphoreWait(
453  MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
454  break;
456  CommandGroup.reset(new detail::CGSemaphoreSignal(
457  MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
458  break;
459  case detail::CG::None:
461  std::cout << "WARNING: An empty command group is submitted." << std::endl;
462  }
463 
464  // Empty nodes are handled by Graph like standard nodes
465  // For Standard mode (non-graph),
466  // empty nodes are not sent to the scheduler to save time
467  if (MGraph || (MQueue && MQueue->getCommandGraph())) {
468  CommandGroup.reset(
469  new detail::CG(detail::CG::None, std::move(CGData), MCodeLoc));
470  } else {
471  detail::EventImplPtr Event = std::make_shared<sycl::detail::event_impl>();
472  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
473  return MLastEvent;
474  }
475  break;
476  }
477 
478  if (!MSubgraphNode && !CommandGroup)
479  throw sycl::runtime_error(
480  "Internal Error. Command group cannot be constructed.",
481  PI_ERROR_INVALID_OPERATION);
482 
483  // If there is a graph associated with the handler we are in the explicit
484  // graph mode, so we store the CG instead of submitting it to the scheduler,
485  // so it can be retrieved by the graph later.
486  if (MGraph) {
487  MGraphNodeCG = std::move(CommandGroup);
488  return detail::createSyclObjFromImpl<event>(
489  std::make_shared<detail::event_impl>());
490  }
491 
492  // If the queue has an associated graph then we need to take the CG and pass
493  // it to the graph to create a node, rather than submit it to the scheduler.
494  if (auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) {
495  auto EventImpl = std::make_shared<detail::event_impl>();
496  std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
497  nullptr;
498 
499  // GraphImpl is read and written in this scope so we lock this graph
500  // with full priviledges.
502  GraphImpl->MMutex);
503 
504  // Create a new node in the graph representing this command-group
505  if (MQueue->isInOrder()) {
506  // In-order queues create implicit linear dependencies between nodes.
507  // Find the last node added to the graph from this queue, so our new
508  // node can set it as a predecessor.
509  auto DependentNode = GraphImpl->getLastInorderNode(MQueue);
510 
511  NodeImpl = DependentNode
512  ? GraphImpl->add(MCGType, std::move(CommandGroup),
513  {DependentNode})
514  : GraphImpl->add(MCGType, std::move(CommandGroup));
515 
516  // If we are recording an in-order queue remember the new node, so it
517  // can be used as a dependency for any more nodes recorded from this
518  // queue.
519  GraphImpl->setLastInorderNode(MQueue, NodeImpl);
520  } else {
521  NodeImpl = GraphImpl->add(MCGType, std::move(CommandGroup));
522  }
523 
524  // Associate an event with this new node and return the event.
525  GraphImpl->addEventForNode(EventImpl, NodeImpl);
526 
527  EventImpl->setCommandGraph(GraphImpl);
528 
529  return detail::createSyclObjFromImpl<event>(EventImpl);
530  }
531 
533  std::move(CommandGroup), std::move(MQueue));
534 
535  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
536  return MLastEvent;
537 }
538 
539 void handler::addReduction(const std::shared_ptr<const void> &ReduObj) {
540  MImpl->MAuxiliaryResources.push_back(ReduObj);
541 }
542 
543 void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl,
544  int AccTarget) {
545  if (getCommandGraph() &&
546  static_cast<detail::SYCLMemObjT *>(AccImpl->MSYCLMemObj)
547  ->needsWriteBack()) {
549  "Accessors to buffers which have write_back enabled "
550  "are not allowed to be used in command graphs.");
551  }
552  detail::Requirement *Req = AccImpl.get();
553  if (Req->MAccessMode != sycl::access_mode::read) {
554  auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
555  SYCLMemObj->handleWriteAccessorCreation();
556  }
557  // Add accessor to the list of requirements.
558  if (Req->MAccessRange.size() != 0)
559  CGData.MRequirements.push_back(Req);
560  // Store copy of the accessor.
561  CGData.MAccStorage.push_back(std::move(AccImpl));
562  // Add an accessor to the handler list of associated accessors.
563  // For associated accessors index does not means nothing.
564  MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor,
565  Req, AccTarget, /*index*/ 0);
566 }
567 
568 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
569  access::target AccTarget) {
570  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
571  static_cast<int>(AccTarget));
572 }
573 
574 void handler::associateWithHandler(
575  detail::UnsampledImageAccessorBaseHost *AccBase, image_target AccTarget) {
576  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
577  static_cast<int>(AccTarget));
578 }
579 
580 void handler::associateWithHandler(
581  detail::SampledImageAccessorBaseHost *AccBase, image_target AccTarget) {
582  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
583  static_cast<int>(AccTarget));
584 }
585 
586 static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index,
587  size_t &IndexShift, int Size,
588  bool IsKernelCreatedFromSource,
589  size_t GlobalSize,
590  std::vector<detail::ArgDesc> &Args,
591  bool isESIMD) {
593  if (AccImpl->PerWI)
594  AccImpl->resize(GlobalSize);
595 
596  Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
597  Index + IndexShift);
598 
599  // TODO ESIMD currently does not suport offset, memory and access ranges -
600  // accessor::init for ESIMD-mode accessor has a single field, translated
601  // to a single kernel argument set above.
602  if (!isESIMD && !IsKernelCreatedFromSource) {
603  // Dimensionality of the buffer is 1 when dimensionality of the
604  // accessor is 0.
605  const size_t SizeAccField =
606  sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
607  ++IndexShift;
608  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
609  &AccImpl->MAccessRange[0], SizeAccField,
610  Index + IndexShift);
611  ++IndexShift;
612  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
613  &AccImpl->MMemoryRange[0], SizeAccField,
614  Index + IndexShift);
615  ++IndexShift;
616  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
617  &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
618  }
619 }
620 
621 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
622  const int Size, const size_t Index, size_t &IndexShift,
623  bool IsKernelCreatedFromSource, bool IsESIMD) {
625 
626  switch (Kind) {
627  case kernel_param_kind_t::kind_std_layout:
628  case kernel_param_kind_t::kind_pointer: {
629  MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
630  break;
631  }
632  case kernel_param_kind_t::kind_stream: {
633  // Stream contains several accessors inside.
634  stream *S = static_cast<stream *>(Ptr);
635 
636  detail::AccessorBaseHost *GBufBase =
637  static_cast<detail::AccessorBaseHost *>(&S->GlobalBuf);
638  detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase);
639  detail::Requirement *GBufReq = GBufImpl.get();
640  addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size,
641  IsKernelCreatedFromSource,
642  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
643  ++IndexShift;
644  detail::AccessorBaseHost *GOffsetBase =
645  static_cast<detail::AccessorBaseHost *>(&S->GlobalOffset);
646  detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase);
647  detail::Requirement *GOffsetReq = GOfssetImpl.get();
648  addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size,
649  IsKernelCreatedFromSource,
650  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
651  ++IndexShift;
652  detail::AccessorBaseHost *GFlushBase =
653  static_cast<detail::AccessorBaseHost *>(&S->GlobalFlushBuf);
654  detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase);
655  detail::Requirement *GFlushReq = GFlushImpl.get();
656 
657  size_t GlobalSize = MNDRDesc.GlobalSize.size();
658  // If work group size wasn't set explicitly then it must be recieved
659  // from kernel attribute or set to default values.
660  // For now we can't get this attribute here.
661  // So we just suppose that WG size is always default for stream.
662  // TODO adjust MNDRDesc when device image contains kernel's attribute
663  if (GlobalSize == 0) {
664  // Suppose that work group size is 1 for every dimension
665  GlobalSize = MNDRDesc.NumWorkGroups.size();
666  }
667  addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size,
668  IsKernelCreatedFromSource, GlobalSize, MArgs,
669  IsESIMD);
670  ++IndexShift;
671  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
672  &S->FlushBufferSize, sizeof(S->FlushBufferSize),
673  Index + IndexShift);
674 
675  break;
676  }
677  case kernel_param_kind_t::kind_accessor: {
678  // For args kind of accessor Size is information about accessor.
679  // The first 11 bits of Size encodes the accessor target.
680  const access::target AccTarget =
681  static_cast<access::target>(Size & AccessTargetMask);
682  switch (AccTarget) {
684  case access::target::constant_buffer: {
685  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
686  addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size,
687  IsKernelCreatedFromSource,
688  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
689  break;
690  }
691  case access::target::local: {
692  detail::LocalAccessorImplHost *LAcc =
693  static_cast<detail::LocalAccessorImplHost *>(Ptr);
694 
695  range<3> &Size = LAcc->MSize;
696  const int Dims = LAcc->MDims;
697  int SizeInBytes = LAcc->MElemSize;
698  for (int I = 0; I < Dims; ++I)
699  SizeInBytes *= Size[I];
700  // Some backends do not accept zero-sized local memory arguments, so we
701  // make it a minimum allocation of 1 byte.
702  SizeInBytes = std::max(SizeInBytes, 1);
703  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr,
704  SizeInBytes, Index + IndexShift);
705  // TODO ESIMD currently does not suport MSize field passing yet
706  // accessor::init for ESIMD-mode accessor has a single field, translated
707  // to a single kernel argument set above.
708  if (!IsESIMD && !IsKernelCreatedFromSource) {
709  ++IndexShift;
710  const size_t SizeAccField = Dims * sizeof(Size[0]);
711  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
712  SizeAccField, Index + IndexShift);
713  ++IndexShift;
714  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
715  SizeAccField, Index + IndexShift);
716  ++IndexShift;
717  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
718  SizeAccField, Index + IndexShift);
719  }
720  break;
721  }
724  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
725  MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
726  if (!IsKernelCreatedFromSource) {
727  // TODO Handle additional kernel arguments for image class
728  // if the compiler front-end adds them.
729  }
730  break;
731  }
734  case access::target::host_buffer: {
735  throw sycl::invalid_parameter_error("Unsupported accessor target case.",
736  PI_ERROR_INVALID_OPERATION);
737  break;
738  }
739  }
740  break;
741  }
742  case kernel_param_kind_t::kind_sampler: {
743  MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler),
744  Index + IndexShift);
745  break;
746  }
747  case kernel_param_kind_t::kind_specialization_constants_buffer: {
748  MArgs.emplace_back(
749  kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
750  Index + IndexShift);
751  break;
752  }
753  case kernel_param_kind_t::kind_invalid:
754  throw runtime_error("Invalid kernel param kind", PI_ERROR_INVALID_VALUE);
755  break;
756  }
757 }
758 
759 // The argument can take up more space to store additional information about
760 // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor.
761 // We use the worst-case estimate because the lifetime of the vector is short.
762 // In processArg the kind_stream case introduces the maximum number of
763 // additional arguments. The case adds additional 12 arguments to the currently
764 // processed argument, hence worst-case estimate is 12+1=13.
765 // TODO: the constant can be removed if the size of MArgs will be calculated at
766 // compile time.
767 inline constexpr size_t MaxNumAdditionalArgs = 13;
768 
769 void handler::extractArgsAndReqs() {
770  assert(MKernel && "MKernel is not initialized");
771  std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
772  MArgs.clear();
773 
774  std::sort(
775  UnPreparedArgs.begin(), UnPreparedArgs.end(),
776  [](const detail::ArgDesc &first, const detail::ArgDesc &second) -> bool {
777  return (first.MIndex < second.MIndex);
778  });
779 
780  const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
781  MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size());
782 
783  size_t IndexShift = 0;
784  for (size_t I = 0; I < UnPreparedArgs.size(); ++I) {
785  void *Ptr = UnPreparedArgs[I].MPtr;
786  const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType;
787  const int &Size = UnPreparedArgs[I].MSize;
788  const int Index = UnPreparedArgs[I].MIndex;
789  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
790  false);
791  }
792 }
793 
794 void handler::extractArgsAndReqsFromLambda(
795  char *LambdaPtr, size_t KernelArgsNum,
796  const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
797  const bool IsKernelCreatedFromSource = false;
798  size_t IndexShift = 0;
799  MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum);
800 
801  for (size_t I = 0; I < KernelArgsNum; ++I) {
802  void *Ptr = LambdaPtr + KernelArgs[I].offset;
803  const detail::kernel_param_kind_t &Kind = KernelArgs[I].kind;
804  const int &Size = KernelArgs[I].info;
806  // For args kind of accessor Size is information about accessor.
807  // The first 11 bits of Size encodes the accessor target.
808  const access::target AccTarget =
809  static_cast<access::target>(Size & AccessTargetMask);
810  if ((AccTarget == access::target::device ||
811  AccTarget == access::target::constant_buffer) ||
812  (AccTarget == access::target::image ||
813  AccTarget == access::target::image_array)) {
814  detail::AccessorBaseHost *AccBase =
815  static_cast<detail::AccessorBaseHost *>(Ptr);
816  Ptr = detail::getSyclObjImpl(*AccBase).get();
817  } else if (AccTarget == access::target::local) {
818  detail::LocalAccessorBaseHost *LocalAccBase =
819  static_cast<detail::LocalAccessorBaseHost *>(Ptr);
820  Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
821  }
822  }
823  processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
824  IsESIMD);
825  }
826 }
827 
828 // Calling methods of kernel_impl requires knowledge of class layout.
829 // As this is impossible in header, there's a function that calls necessary
830 // method inside the library and returns the result.
831 std::string handler::getKernelName() {
832  return MKernel->get_info<info::kernel::function_name>();
833 }
834 
835 void handler::verifyUsedKernelBundle(const std::string &KernelName) {
836  auto UsedKernelBundleImplPtr =
837  getOrInsertHandlerKernelBundle(/*Insert=*/false);
838  if (!UsedKernelBundleImplPtr)
839  return;
840 
841  // Implicit kernel bundles are populated late so we ignore them
842  if (!MImpl->isStateExplicitKernelBundle())
843  return;
844 
845  kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
846  device Dev =
847  MGraph ? MGraph->getDevice() : detail::getDeviceFromHandler(*this);
848  if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
849  throw sycl::exception(
851  "The kernel bundle in use does not contain the kernel");
852 }
853 
854 void handler::ext_oneapi_barrier(const std::vector<event> &WaitList) {
855  throwIfActionIsCreated();
856  MCGType = detail::CG::BarrierWaitlist;
857  MEventsWaitWithBarrier.resize(WaitList.size());
858  std::transform(
859  WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
860  [](const event &Event) { return detail::getSyclObjImpl(Event); });
861 }
862 
863 using namespace sycl::detail;
864 bool handler::DisableRangeRounding() {
866 }
867 
868 bool handler::RangeRoundingTrace() {
870 }
871 
872 void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
873  size_t &MinRange) {
874  SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
875  MinFactor, GoodFactor, MinRange);
876 }
877 
878 void handler::memcpy(void *Dest, const void *Src, size_t Count) {
879  throwIfActionIsCreated();
880  MSrcPtr = const_cast<void *>(Src);
881  MDstPtr = Dest;
882  MLength = Count;
883  setType(detail::CG::CopyUSM);
884 }
885 
886 void handler::memset(void *Dest, int Value, size_t Count) {
887  throwIfActionIsCreated();
888  MDstPtr = Dest;
889  MPattern.push_back(static_cast<char>(Value));
890  MLength = Count;
891  setType(detail::CG::FillUSM);
892 }
893 
894 void handler::prefetch(const void *Ptr, size_t Count) {
895  throwIfActionIsCreated();
896  MDstPtr = const_cast<void *>(Ptr);
897  MLength = Count;
898  setType(detail::CG::PrefetchUSM);
899 }
900 
901 void handler::mem_advise(const void *Ptr, size_t Count, int Advice) {
902  throwIfActionIsCreated();
903  MDstPtr = const_cast<void *>(Ptr);
904  MLength = Count;
905  MImpl->MAdvice = static_cast<pi_mem_advice>(Advice);
906  setType(detail::CG::AdviseUSM);
907 }
908 
909 void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch,
910  const void *Src, size_t SrcPitch,
911  size_t Width, size_t Height) {
912  // Checks done in callers.
913  MSrcPtr = const_cast<void *>(Src);
914  MDstPtr = Dest;
915  MImpl->MSrcPitch = SrcPitch;
916  MImpl->MDstPitch = DestPitch;
917  MImpl->MWidth = Width;
918  MImpl->MHeight = Height;
919  setType(detail::CG::Copy2DUSM);
920 }
921 
922 void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch,
923  const void *Value, size_t ValueSize,
924  size_t Width, size_t Height) {
925  // Checks done in callers.
926  MDstPtr = Dest;
927  MPattern.resize(ValueSize);
928  std::memcpy(MPattern.data(), Value, ValueSize);
929  MImpl->MDstPitch = DestPitch;
930  MImpl->MWidth = Width;
931  MImpl->MHeight = Height;
932  setType(detail::CG::Fill2DUSM);
933 }
934 
935 void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value,
936  size_t Width, size_t Height) {
937  // Checks done in callers.
938  MDstPtr = Dest;
939  MPattern.push_back(static_cast<char>(Value));
940  MImpl->MDstPitch = DestPitch;
941  MImpl->MWidth = Width;
942  MImpl->MHeight = Height;
943  setType(detail::CG::Memset2DUSM);
944 }
945 
949  throwIfGraphAssociated<
950  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
951  sycl_ext_oneapi_bindless_images>();
952  MSrcPtr = Src;
953  MDstPtr = Dest.raw_handle;
954 
956  PiDesc.image_width = Desc.width;
957  PiDesc.image_height = Desc.height;
958  PiDesc.image_depth = Desc.depth;
959  PiDesc.image_type = Desc.depth > 0 ? PI_MEM_TYPE_IMAGE3D
960  : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D
962 
964  PiFormat.image_channel_data_type =
966  PiFormat.image_channel_order =
968 
969  MImpl->MSrcOffset = {0, 0, 0};
970  MImpl->MDestOffset = {0, 0, 0};
971  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
972  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
973  MImpl->MImageDesc = PiDesc;
974  MImpl->MImageFormat = PiFormat;
975  MImpl->MImageCopyFlags =
977  setType(detail::CG::CopyImage);
978 }
979 
981  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
984  sycl::range<3> CopyExtent) {
985  throwIfGraphAssociated<
986  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
987  sycl_ext_oneapi_bindless_images>();
988  MSrcPtr = Src;
989  MDstPtr = Dest.raw_handle;
990 
992  PiDesc.image_width = DestImgDesc.width;
993  PiDesc.image_height = DestImgDesc.height;
994  PiDesc.image_depth = DestImgDesc.depth;
995  PiDesc.image_type = DestImgDesc.depth > 0
997  : (DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
999 
1001  PiFormat.image_channel_data_type =
1003  PiFormat.image_channel_order =
1005 
1006  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1007  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1008  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1009  MImpl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]};
1010  MImpl->MImageDesc = PiDesc;
1011  MImpl->MImageFormat = PiFormat;
1012  MImpl->MImageCopyFlags =
1014  setType(detail::CG::CopyImage);
1015 }
1016 
1020  throwIfGraphAssociated<
1021  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1022  sycl_ext_oneapi_bindless_images>();
1023  MSrcPtr = Src.raw_handle;
1024  MDstPtr = Dest;
1025 
1027  PiDesc.image_width = Desc.width;
1028  PiDesc.image_height = Desc.height;
1029  PiDesc.image_depth = Desc.depth;
1030  PiDesc.image_type = Desc.depth > 0 ? PI_MEM_TYPE_IMAGE3D
1031  : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1033 
1035  PiFormat.image_channel_data_type =
1037  PiFormat.image_channel_order =
1039 
1040  MImpl->MSrcOffset = {0, 0, 0};
1041  MImpl->MDestOffset = {0, 0, 0};
1042  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1043  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1044  MImpl->MImageDesc = PiDesc;
1045  MImpl->MImageFormat = PiFormat;
1046  MImpl->MImageCopyFlags =
1048  setType(detail::CG::CopyImage);
1049 }
1050 
1053  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1054  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1055  sycl::range<3> CopyExtent) {
1056  throwIfGraphAssociated<
1057  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1058  sycl_ext_oneapi_bindless_images>();
1059  MSrcPtr = Src.raw_handle;
1060  MDstPtr = Dest;
1061 
1063  PiDesc.image_width = SrcImgDesc.width;
1064  PiDesc.image_height = SrcImgDesc.height;
1065  PiDesc.image_depth = SrcImgDesc.depth;
1066  PiDesc.image_type =
1067  SrcImgDesc.depth > 0
1069  : (SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D);
1070 
1072  PiFormat.image_channel_data_type =
1074  PiFormat.image_channel_order =
1076 
1077  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1078  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1079  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1080  MImpl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]};
1081  MImpl->MImageDesc = PiDesc;
1082  MImpl->MImageFormat = PiFormat;
1083  MImpl->MImageCopyFlags =
1085  setType(detail::CG::CopyImage);
1086 }
1087 
1089  void *Src, void *Dest,
1090  const ext::oneapi::experimental::image_descriptor &Desc, size_t Pitch) {
1091  throwIfGraphAssociated<
1092  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1093  sycl_ext_oneapi_bindless_images>();
1094  MSrcPtr = Src;
1095  MDstPtr = Dest;
1096 
1098  PiDesc.image_width = Desc.width;
1099  PiDesc.image_height = Desc.height;
1100  PiDesc.image_depth = Desc.depth;
1101  PiDesc.image_type = Desc.depth > 0 ? PI_MEM_TYPE_IMAGE3D
1102  : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1104 
1106  PiFormat.image_channel_data_type =
1108  PiFormat.image_channel_order =
1110 
1111  MImpl->MSrcOffset = {0, 0, 0};
1112  MImpl->MDestOffset = {0, 0, 0};
1113  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1114  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1115  MImpl->MImageDesc = PiDesc;
1116  MImpl->MImageDesc.image_row_pitch = Pitch;
1117  MImpl->MImageFormat = PiFormat;
1118  MImpl->MImageCopyFlags = detail::getPiImageCopyFlags(
1119  get_pointer_type(Src, MQueue->get_context()),
1120  get_pointer_type(Dest, MQueue->get_context()));
1121  setType(detail::CG::CopyImage);
1122 }
1123 
1125  void *Src, sycl::range<3> SrcOffset, void *Dest, sycl::range<3> DestOffset,
1126  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1127  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1128  sycl::range<3> CopyExtent) {
1129  throwIfGraphAssociated<
1130  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1131  sycl_ext_oneapi_bindless_images>();
1132  MSrcPtr = Src;
1133  MDstPtr = Dest;
1134 
1136  PiDesc.image_width = DeviceImgDesc.width;
1137  PiDesc.image_height = DeviceImgDesc.height;
1138  PiDesc.image_depth = DeviceImgDesc.depth;
1139  PiDesc.image_type = DeviceImgDesc.depth > 0
1141  : (DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1143 
1145  PiFormat.image_channel_data_type =
1147  PiFormat.image_channel_order =
1149 
1150  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1151  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1152  MImpl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]};
1153  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1154  MImpl->MImageDesc = PiDesc;
1155  MImpl->MImageDesc.image_row_pitch = DeviceRowPitch;
1156  MImpl->MImageFormat = PiFormat;
1157  MImpl->MImageCopyFlags = detail::getPiImageCopyFlags(
1158  get_pointer_type(Src, MQueue->get_context()),
1159  get_pointer_type(Dest, MQueue->get_context()));
1160  setType(detail::CG::CopyImage);
1161 }
1162 
1165  throwIfGraphAssociated<
1166  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1167  sycl_ext_oneapi_bindless_images>();
1168  MImpl->MInteropSemaphoreHandle =
1170  setType(detail::CG::SemaphoreWait);
1171 }
1172 
1175  throwIfGraphAssociated<
1176  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1177  sycl_ext_oneapi_bindless_images>();
1178  MImpl->MInteropSemaphoreHandle =
1180  setType(detail::CG::SemaphoreSignal);
1181 }
1182 
1184  const kernel_bundle<bundle_state::executable> &ExecBundle) {
1185  std::shared_ptr<detail::queue_impl> PrimaryQueue =
1186  MImpl->MSubmissionPrimaryQueue;
1187  if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) ||
1188  (MGraph && (MGraph->getContext() != ExecBundle.get_context())))
1189  throw sycl::exception(
1191  "Context associated with the primary queue is different from the "
1192  "context associated with the kernel bundle");
1193 
1194  std::shared_ptr<detail::queue_impl> SecondaryQueue =
1195  MImpl->MSubmissionSecondaryQueue;
1196  if (SecondaryQueue &&
1197  SecondaryQueue->get_context() != ExecBundle.get_context())
1198  throw sycl::exception(
1200  "Context associated with the secondary queue is different from the "
1201  "context associated with the kernel bundle");
1202 
1203  setStateExplicitKernelBundle();
1204  setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle));
1205 }
1206 
1208  auto EventImpl = detail::getSyclObjImpl(Event);
1209  if (EventImpl->isDiscarded()) {
1211  "Queue operation cannot depend on discarded event.");
1212  }
1213  if (auto Graph = getCommandGraph(); Graph) {
1214  auto EventGraph = EventImpl->getCommandGraph();
1215  if (EventGraph == nullptr) {
1216  throw sycl::exception(
1218  "Graph nodes cannot depend on events from outside the graph.");
1219  }
1220  if (EventGraph != Graph) {
1221  throw sycl::exception(
1223  "Graph nodes cannot depend on events from another graph.");
1224  }
1225  }
1226  CGData.MEvents.push_back(EventImpl);
1227 }
1228 
1229 void handler::depends_on(const std::vector<event> &Events) {
1230  for (const event &Event : Events) {
1231  depends_on(Event);
1232  }
1233 }
1234 
1235 static bool
1236 checkContextSupports(const std::shared_ptr<detail::context_impl> &ContextImpl,
1237  sycl::detail::pi::PiContextInfo InfoQuery) {
1238  auto &Plugin = ContextImpl->getPlugin();
1239  pi_bool SupportsOp = false;
1240  Plugin->call<detail::PiApiKind::piContextGetInfo>(ContextImpl->getHandleRef(),
1241  InfoQuery, sizeof(pi_bool),
1242  &SupportsOp, nullptr);
1243  return SupportsOp;
1244 }
1245 
1246 bool handler::supportsUSMMemcpy2D() {
1247  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1248  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1249  if (QueueImpl &&
1250  !checkContextSupports(QueueImpl->getContextImplPtr(),
1252  return false;
1253  }
1254  return true;
1255 }
1256 
1257 bool handler::supportsUSMFill2D() {
1258  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1259  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1260  if (QueueImpl &&
1261  !checkContextSupports(QueueImpl->getContextImplPtr(),
1263  return false;
1264  }
1265  return true;
1266 }
1267 
1268 bool handler::supportsUSMMemset2D() {
1269  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1270  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1271  if (QueueImpl &&
1272  !checkContextSupports(QueueImpl->getContextImplPtr(),
1274  return false;
1275  }
1276  return true;
1277 }
1278 
1279 id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) {
1280  device Dev = MQueue->get_device();
1281  range<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
1282  Dev.get_info<info::device::max_compute_units>();
1283  return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
1284 }
1285 
1286 void handler::ext_intel_read_host_pipe(const std::string &Name, void *Ptr,
1287  size_t Size, bool Block) {
1288  MImpl->HostPipeName = Name;
1289  MImpl->HostPipePtr = Ptr;
1290  MImpl->HostPipeTypeSize = Size;
1291  MImpl->HostPipeBlocking = Block;
1292  MImpl->HostPipeRead = 1;
1294 }
1295 
1296 void handler::ext_intel_write_host_pipe(const std::string &Name, void *Ptr,
1297  size_t Size, bool Block) {
1298  MImpl->HostPipeName = Name;
1299  MImpl->HostPipePtr = Ptr;
1300  MImpl->HostPipeTypeSize = Size;
1301  MImpl->HostPipeBlocking = Block;
1302  MImpl->HostPipeRead = 0;
1304 }
1305 
1306 void handler::memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src,
1307  bool IsDeviceImageScoped, size_t NumBytes,
1308  size_t Offset) {
1309  throwIfActionIsCreated();
1310  MSrcPtr = const_cast<void *>(Src);
1311  MDstPtr = const_cast<void *>(DeviceGlobalPtr);
1312  MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1313  MLength = NumBytes;
1314  MImpl->MOffset = Offset;
1316 }
1317 
1318 void handler::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
1319  bool IsDeviceImageScoped, size_t NumBytes,
1320  size_t Offset) {
1321  throwIfActionIsCreated();
1322  MSrcPtr = const_cast<void *>(DeviceGlobalPtr);
1323  MDstPtr = Dest;
1324  MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1325  MLength = NumBytes;
1326  MImpl->MOffset = Offset;
1328 }
1329 
1330 void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr,
1331  const void *Src,
1332  size_t DeviceGlobalTSize,
1333  bool IsDeviceImageScoped,
1334  size_t NumBytes, size_t Offset) {
1335  std::weak_ptr<detail::context_impl> WeakContextImpl =
1336  MQueue->getContextImplPtr();
1337  std::weak_ptr<detail::device_impl> WeakDeviceImpl =
1338  MQueue->getDeviceImplPtr();
1339  host_task([=] {
1340  // Capture context and device as weak to avoid keeping them alive for too
1341  // long. If they are dead by the time this executes, the operation would not
1342  // have been visible anyway.
1343  std::shared_ptr<detail::context_impl> ContextImpl = WeakContextImpl.lock();
1344  std::shared_ptr<detail::device_impl> DeviceImpl = WeakDeviceImpl.lock();
1345  if (ContextImpl && DeviceImpl)
1346  ContextImpl->memcpyToHostOnlyDeviceGlobal(
1347  DeviceImpl, DeviceGlobalPtr, Src, DeviceGlobalTSize,
1348  IsDeviceImageScoped, NumBytes, Offset);
1349  });
1350 }
1351 
1352 void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest,
1353  const void *DeviceGlobalPtr,
1354  bool IsDeviceImageScoped,
1355  size_t NumBytes, size_t Offset) {
1356  const std::shared_ptr<detail::context_impl> &ContextImpl =
1357  MQueue->getContextImplPtr();
1358  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1359  MQueue->getDeviceImplPtr();
1360  host_task([=] {
1361  // Unlike memcpy to device_global, we need to keep the context and device
1362  // alive in the capture of this operation as we must be able to correctly
1363  // copy the value to the user-specified pointer.
1364  ContextImpl->memcpyFromHostOnlyDeviceGlobal(
1365  DeviceImpl, Dest, DeviceGlobalPtr, IsDeviceImageScoped, NumBytes,
1366  Offset);
1367  });
1368 }
1369 
1370 const std::shared_ptr<detail::context_impl> &
1371 handler::getContextImplPtr() const {
1372  return MQueue->getContextImplPtr();
1373 }
1374 
1375 void handler::setKernelCacheConfig(
1377  MImpl->MKernelCacheConfig = Config;
1378 }
1379 
1383  Graph) {
1385  auto GraphImpl = detail::getSyclObjImpl(Graph);
1386  // GraphImpl is only read in this scope so we lock this graph for read only
1388  GraphImpl->MMutex);
1389 
1390  std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> ParentGraph;
1391  if (MQueue) {
1392  ParentGraph = MQueue->getCommandGraph();
1393  } else {
1394  ParentGraph = MGraph;
1395  }
1396 
1398  // If a parent graph is set that means we are adding or recording a subgraph
1399  if (ParentGraph) {
1400  // ParentGraph is read and written in this scope so we lock this graph
1401  // with full priviledges.
1402  // We only lock for Record&Replay API because the graph has already been
1403  // lock if this function was called from the explicit API function add
1404  if (MQueue) {
1406  ParentGraph->MMutex);
1407  }
1408  // Store the node representing the subgraph in the handler so that we can
1409  // return it to the user later.
1410  // The nodes of the subgraph are duplicated when added to its parents.
1411  // This avoids changing properties of the graph added as a subgraph.
1412  MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl);
1413 
1414  // If we are recording an in-order queue remember the subgraph node, so it
1415  // can be used as a dependency for any more nodes recorded from this queue.
1416  if (MQueue && MQueue->isInOrder()) {
1417  ParentGraph->setLastInorderNode(MQueue, MSubgraphNode);
1418  }
1419  // Associate an event with the subgraph node.
1420  auto SubgraphEvent = std::make_shared<event_impl>();
1421  SubgraphEvent->setCommandGraph(ParentGraph);
1422  ParentGraph->addEventForNode(SubgraphEvent, MSubgraphNode);
1423  } else {
1424  // Set the exec graph for execution during finalize.
1425  MExecGraph = GraphImpl;
1426  }
1427 }
1428 
1429 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1430 handler::getCommandGraph() const {
1431  if (MGraph) {
1432  return MGraph;
1433  }
1434  return MQueue->getCommandGraph();
1435 }
1436 
1437 std::optional<std::array<size_t, 3>> handler::getMaxWorkGroups() {
1439  std::array<size_t, 3> PiResult = {};
1440  auto Ret = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1441  Dev->getHandleRef(),
1442  PiInfoCode<
1443  ext::oneapi::experimental::info::device::max_work_groups<3>>::value,
1444  sizeof(PiResult), &PiResult, nullptr);
1445  if (Ret == PI_SUCCESS) {
1446  return PiResult;
1447  }
1448  return {};
1449 }
1450 
1451 } // namespace _V1
1452 } // namespace sycl
sycl::_V1::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:795
sycl::_V1::detail::NDRDescT::NumWorkGroups
sycl::range< 3 > NumWorkGroups
Number of workgroups, used to record the number of workgroups from the simplest form of parallel_for_...
Definition: cg_types.hpp:134
sycl::_V1::kernel_bundle::get_context
context get_context() const noexcept
Definition: kernel_bundle.hpp:240
sycl::_V1::detail::AccessorImplPtr
std::shared_ptr< AccessorImplHost > AccessorImplPtr
Definition: accessor.hpp:532
piEnqueueKernelLaunch
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:531
sycl::_V1::handler::mem_advise
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:901
sycl::_V1::detail::DeviceGlobalMapEntry::MImageIdentifiers
std::set< std::uintptr_t > MImageIdentifiers
Definition: device_global_map_entry.hpp:61
sycl::_V1::handler::ext_oneapi_barrier
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2784
sycl::_V1::detail::AccessorImplHost::MOffset
id< 3 > & MOffset
Definition: accessor_impl.hpp:125
_pi_context_info
_pi_context_info
Definition: pi.h:448
sycl::_V1::detail::CG::Fill2DUSM
@ Fill2DUSM
Definition: cg.hpp:68
PI_MEM_TYPE_IMAGE1D
@ PI_MEM_TYPE_IMAGE1D
Definition: pi.h:562
pi.h
sycl::_V1::detail::SYCL_STREAM_NAME
constexpr const char * SYCL_STREAM_NAME
Definition: xpti_registry.hpp:29
sycl::_V1::get_pointer_type
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:556
pi_bool
pi_uint32 pi_bool
Definition: pi.h:196
_pi_image_format::image_channel_data_type
pi_image_channel_type image_channel_data_type
Definition: pi.h:1100
sycl::_V1::detail::CG::CopyToDeviceGlobal
@ CopyToDeviceGlobal
Definition: cg.hpp:70
graph_impl.hpp
sycl::_V1::detail::NDRDescT::GlobalSize
sycl::range< 3 > GlobalSize
Definition: cg_types.hpp:128
sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:94
sycl::_V1::detail::CG::StorageInitHelper::MEvents
std::vector< detail::EventImplPtr > MEvents
List of events that order the execution of this CG.
Definition: cg.hpp:105
config.hpp
sycl::_V1::detail::kernel_param_kind_t
kernel_param_kind_t
Definition: kernel_desc.hpp:38
sycl::_V1::detail::CG::StorageInitHelper::MRequirements
std::vector< AccessorImplHost * > MRequirements
List of requirements that specify which memory is needed for the command group to be executed.
Definition: cg.hpp:103
sycl::_V1::ext::oneapi::experimental::image_descriptor::channel_order
image_channel_order channel_order
Definition: bindless_images_descriptor.hpp:38
_pi_image_format::image_channel_order
pi_image_channel_order image_channel_order
Definition: pi.h:1099
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
Definition: pi.h:461
sycl::_V1::detail::AccessorImplHost
Definition: accessor_impl.hpp:42
_pi_image_desc::image_type
pi_mem_type image_type
Definition: pi.h:1104
_pi_image_copy_flags
_pi_image_copy_flags
Definition: pi.h:619
sycl::_V1::bundle_state::ext_oneapi_source
@ ext_oneapi_source
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:567
sycl::_V1::detail::pi::PiInteropSemaphoreHandle
::pi_interop_semaphore_handle PiInteropSemaphoreHandle
Definition: pi.hpp:164
sycl::_V1::detail::CG::PrefetchUSM
@ PrefetchUSM
Definition: cg.hpp:64
sycl::_V1::detail::AccessorImplHost::PerWI
bool PerWI
Definition: accessor_impl.hpp:143
sycl::_V1::ext::oneapi::experimental::image_descriptor::channel_type
image_channel_type channel_type
Definition: bindless_images_descriptor.hpp:39
sycl::_V1::detail::get_kernel_id_impl
kernel_id get_kernel_id_impl(std::string KernelName)
Definition: kernel_bundle.cpp:140
sycl::_V1::detail::memcpy
void memcpy(void *Dst, const void *Src, size_t Size)
Definition: memcpy.hpp:16
sycl::_V1::errc::kernel_argument
@ kernel_argument
sycl::_V1::detail::kernel_param_kind_t::kind_accessor
@ kind_accessor
PI_IMAGE_COPY_DEVICE_TO_HOST
@ PI_IMAGE_COPY_DEVICE_TO_HOST
Definition: pi.h:621
sycl::_V1::bundle_state::object
@ object
sycl::_V1::detail::CG::SemaphoreSignal
@ SemaphoreSignal
Definition: cg.hpp:76
sycl::_V1::ext::oneapi::experimental::image_descriptor::height
size_t height
Definition: bindless_images_descriptor.hpp:36
usm_impl.hpp
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:119
event.hpp
sycl::_V1::handler::host_task
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
Definition: handler.hpp:1939
sycl::_V1::detail::DeviceGlobalMapEntry
Definition: device_global_map_entry.hpp:52
sycl::_V1::access::target::host_image
@ host_image
sycl::_V1::handler::memset
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:886
helpers.hpp
sycl::_V1::detail::CG::AdviseUSM
@ AdviseUSM
Definition: cg.hpp:66
sycl
Definition: access.hpp:18
sycl::_V1::detail::CG::None
@ None
Definition: cg.hpp:53
_pi_image_desc::image_height
size_t image_height
Definition: pi.h:1106
sycl::_V1::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
sycl::_V1::detail::CG::Kernel
@ Kernel
Definition: cg.hpp:54
queue_impl.hpp
sycl::_V1::MaxNumAdditionalArgs
constexpr size_t MaxNumAdditionalArgs
Definition: handler.cpp:767
pi.hpp
sycl::_V1::checkContextSupports
static bool checkContextSupports(const std::shared_ptr< detail::context_impl > &ContextImpl, sycl::detail::pi::PiContextInfo InfoQuery)
Definition: handler.cpp:1236
sycl::_V1::detail::Requirement
AccessorImplHost Requirement
Definition: accessor_impl.hpp:210
scheduler.hpp
sycl::_V1::detail::AccessorImplHost::MMemoryRange
range< 3 > & MMemoryRange
Definition: accessor_impl.hpp:129
PI_IMAGE_COPY_DEVICE_TO_DEVICE
@ PI_IMAGE_COPY_DEVICE_TO_DEVICE
Definition: pi.h:622
_pi_kernel
Definition: pi_cuda.hpp:72
sycl::_V1::ext::oneapi::experimental::image_mem_handle
Opaque image memory handle type.
Definition: bindless_images_memory.hpp:31
sycl::_V1::detail::CG::CopyFromDeviceGlobal
@ CopyFromDeviceGlobal
Definition: cg.hpp:71
sycl::_V1::errc::runtime
@ runtime
handler_impl.hpp
sycl::_V1::detail::CG::CopyAccToPtr
@ CopyAccToPtr
Definition: cg.hpp:55
sycl::_V1::handler::ext_oneapi_wait_external_semaphore
void ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: handler.cpp:1163
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:462
sycl::_V1::detail::ProgramManager::getInstance
static ProgramManager & getInstance()
Definition: program_manager.cpp:69
sycl::_V1::range
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: buffer.hpp:52
sycl::_V1::detail::CG::StorageInitHelper::MAccStorage
std::vector< detail::AccessorImplPtr > MAccStorage
Storage for accessors.
Definition: cg.hpp:97
sycl::_V1::detail::AccessorImplHost::resize
void resize(size_t GlobalSize)
Definition: accessor_impl.cpp:27
sycl::_V1::ext::oneapi::experimental::detail::graph_impl::ReadLock
std::shared_lock< std::shared_mutex > ReadLock
Definition: graph_impl.hpp:465
sycl::_V1::handler::ext_oneapi_copy
void ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: handler.cpp:946
sycl::_V1::detail::ProgramManager::getSYCLKernelID
kernel_id getSYCLKernelID(const std::string &KernelName)
Definition: program_manager.cpp:1674
sycl::_V1::detail::CG::SemaphoreWait
@ SemaphoreWait
Definition: cg.hpp:75
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
sycl::_V1::detail::CG::ReadWriteHostPipe
@ ReadWriteHostPipe
Definition: cg.hpp:72
_pi_image_desc::image_depth
size_t image_depth
Definition: pi.h:1107
sycl::_V1::handler::ext_oneapi_signal_external_semaphore
void ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: handler.cpp:1173
sycl::_V1::detail::pi::PiResult
::pi_result PiResult
Definition: pi.hpp:128
sycl::_V1::ext::oneapi::experimental::image_descriptor::width
size_t width
Definition: bindless_images_descriptor.hpp:35
PI_MEM_TYPE_IMAGE2D
@ PI_MEM_TYPE_IMAGE2D
Definition: pi.h:559
sycl::_V1::access::target::host_task
@ host_task
image_impl.hpp
sycl::_V1::ext::oneapi::experimental::image_descriptor
A struct to describe the properties of an image.
Definition: bindless_images_descriptor.hpp:34
sycl::_V1::access::target::image
@ image
sycl::_V1::ext::oneapi::experimental::assert
assert(false)
sycl::_V1::handler::use_kernel_bundle
void use_kernel_bundle(const kernel_bundle< bundle_state::executable > &ExecBundle)
Definition: handler.cpp:1183
sycl::_V1::detail::CG::Copy2DUSM
@ Copy2DUSM
Definition: cg.hpp:67
commands.hpp
sycl::_V1::detail::CG::CopyPtrToAcc
@ CopyPtrToAcc
Definition: cg.hpp:56
sycl::_V1::handler
Command group handler class.
Definition: handler.hpp:459
sycl::_V1::usm::alloc
alloc
Definition: usm_enums.hpp:14
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:73
sycl::_V1::exception
Definition: exception.hpp:68
sycl::_V1::kernel_bundle< bundle_state::executable >
common.hpp
sycl::_V1::ext::oneapi::experimental::detail::graph_impl::WriteLock
std::unique_lock< std::shared_mutex > WriteLock
Definition: graph_impl.hpp:466
sycl::_V1::detail::Scheduler::addCG
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:94
global_handler.hpp
sycl::_V1::detail::createSyclObjFromImpl
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:48
sycl::_V1::handler::ext_oneapi_graph
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
Definition: handler.cpp:1380
sycl::_V1::ext::oneapi::experimental::image_mem_handle::raw_handle
handle_type raw_handle
Definition: bindless_images_memory.hpp:33
sycl::_V1::detail::CG::Fill
@ Fill
Definition: cg.hpp:60
sycl::_V1::access::target
target
Definition: access.hpp:22
PI_IMAGE_COPY_HOST_TO_DEVICE
@ PI_IMAGE_COPY_HOST_TO_DEVICE
Definition: pi.h:620
sycl::_V1::access::target::device
@ device
sycl::_V1::detail::EventImplPtr
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:39
sycl::_V1::detail::ProgramManager::kernelUsesAssert
bool kernelUsesAssert(const std::string &KernelName) const
Definition: program_manager.cpp:1359
sycl::_V1::bundle_state::input
@ input
sycl::_V1::errc::invalid
@ invalid
sycl::_V1::detail::CG::ExecCommandBuffer
@ ExecCommandBuffer
Definition: cg.hpp:73
_pi_image_format
Definition: pi.h:1098
sycl::_V1::detail::CG::Memset2DUSM
@ Memset2DUSM
Definition: cg.hpp:69
sycl::_V1::errc::kernel_not_supported
@ kernel_not_supported
sycl::_V1::detail::CG::BarrierWaitlist
@ BarrierWaitlist
Definition: cg.hpp:59
_pi_image_desc::image_width
size_t image_width
Definition: pi.h:1105
sycl::_V1::detail::NDRDescT::LocalSize
sycl::range< 3 > LocalSize
Definition: cg_types.hpp:129
sycl::_V1::ext::oneapi::experimental::arg
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, _Tp > arg(const complex< _Tp > &__c)
Definition: complex_math.hpp:131
sycl::_V1::handler::prefetch
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:894
sycl::_V1::detail::NDRDescT::GlobalOffset
sycl::id< 3 > GlobalOffset
Definition: cg_types.hpp:130
handler.hpp
_pi_image_desc::image_row_pitch
size_t image_row_pitch
Definition: pi.h:1109
_pi_image_desc
Definition: pi.h:1103
sycl::_V1::handler::memcpy
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:878
std
Definition: accessor.hpp:4171
kernel_desc.hpp
sycl::_V1::detail::ProgramManager::getDeviceGlobalEntry
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
Definition: program_manager.cpp:1751
sycl::_V1::detail::getDeviceFromHandler
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
Definition: accessor.cpp:16
sycl::_V1::detail::getPiImageCopyFlags
sycl::detail::pi::PiImageCopyFlags getPiImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType)
Definition: handler.cpp:45
sycl::_V1::detail::CG::CopyUSM
@ CopyUSM
Definition: cg.hpp:62
sycl::_V1::ext::oneapi::experimental::image_descriptor::depth
size_t depth
Definition: bindless_images_descriptor.hpp:37
sycl::_V1::detail::CG::FillUSM
@ FillUSM
Definition: cg.hpp:63
sycl::_V1::detail::Scheduler::getInstance
static Scheduler & getInstance()
Definition: scheduler.cpp:260
sycl::_V1::detail::pi::PI_TRACE_ALL
@ PI_TRACE_ALL
Definition: pi.hpp:60
kernel_bundle_impl.hpp
sycl::_V1::detail::CG::Barrier
@ Barrier
Definition: cg.hpp:58
stream.hpp
info_desc.hpp
sycl::_V1::ext::oneapi::experimental::interop_semaphore_handle::raw_handle
raw_handle_type raw_handle
Definition: bindless_images_interop.hpp:45
sycl::_V1::detail::CG::CopyAccToAcc
@ CopyAccToAcc
Definition: cg.hpp:57
kernel_impl.hpp
sycl::_V1::detail::AccessorImplHost::MAccessRange
range< 3 > & MAccessRange
Definition: accessor_impl.hpp:127
sycl::_V1::ext::oneapi::experimental::command_graph
Graph in the modifiable state.
Definition: graph.hpp:151
sycl::_V1::detail::AccessorImplHost::MDims
unsigned int MDims
Definition: accessor_impl.hpp:134
sycl::_V1::detail::CG::CodeplayHostTask
@ CodeplayHostTask
Definition: cg.hpp:65
sycl::_V1::detail::isDeviceGlobalUsedInKernel
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
Definition: handler.cpp:37
sycl::_V1::detail::convertChannelType
sycl::detail::pi::PiMemImageChannelType convertChannelType(image_channel_type Type)
Definition: image_impl.cpp:187
sycl::_V1::ext::oneapi::experimental::interop_semaphore_handle
Opaque interop semaphore handle type.
Definition: bindless_images_interop.hpp:43
sycl::_V1::detail::pi::trace
bool trace(TraceLevel level)
Definition: pi.cpp:423
sycl::_V1::detail::convertChannelOrder
sycl::detail::pi::PiMemImageChannelOrder convertChannelOrder(image_channel_order Order)
Definition: image_impl.cpp:111
std::cout
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
_pi_kernel_cache_config
_pi_kernel_cache_config
Definition: pi.h:744
sycl::_V1::handler::depends_on
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1207
sycl::_V1::addArgsForGlobalAccessor
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
Definition: handler.cpp:586
PI_MEM_TYPE_IMAGE3D
@ PI_MEM_TYPE_IMAGE3D
Definition: pi.h:560
sycl::_V1::bundle_state::executable
@ executable
sycl::_V1::detail::NDRDescT::Dims
size_t Dims
Definition: cg_types.hpp:135
sycl::_V1::image_target
image_target
Definition: access.hpp:74
sycl::_V1::detail::CG::CopyImage
@ CopyImage
Definition: cg.hpp:74
PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
Definition: pi.h:460
sycl::_V1::detail::CG::UpdateHost
@ UpdateHost
Definition: cg.hpp:61
pi_int32
int32_t pi_int32
Definition: pi.h:193
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
sycl::_V1::detail::enqueueImpKernel
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig)
Definition: commands.cpp:2498
sycl::_V1::handler::stream
friend class stream
Definition: handler.hpp:3360
sycl::_V1::access::target::image_array
@ image_array
Definition: accessor.hpp:3334
sycl::_V1::ext::oneapi::experimental::graph_state::executable
@ executable
In executable state, the graph is ready to execute.