DPC++ Runtime
Runtime libraries for oneAPI DPC++
commands.hpp
Go to the documentation of this file.
1 //==-------------- commands.hpp - SYCL standard header file ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <atomic>
12 #include <cstdint>
13 #include <deque>
14 #include <memory>
15 #include <optional>
16 #include <set>
17 #include <unordered_set>
18 #include <vector>
19 
20 #include <detail/accessor_impl.hpp>
21 #include <detail/event_impl.hpp>
23 #include <sycl/access/access.hpp>
24 #include <sycl/detail/cg.hpp>
25 
26 namespace sycl {
27 inline namespace _V1 {
28 
29 namespace ext::oneapi::experimental::detail {
30 class exec_graph_impl;
31 class node_impl;
32 } // namespace ext::oneapi::experimental::detail
33 namespace detail {
34 
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 bool CurrentCodeLocationValid();
37 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
38  xpti_td *TraceEvent, uint16_t Type,
39  const void *Addr);
40 #endif
41 
42 class queue_impl;
43 class event_impl;
44 class context_impl;
45 class DispatchHostTask;
46 
47 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
48 using EventImplPtr = std::shared_ptr<detail::event_impl>;
49 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
50 using StreamImplPtr = std::shared_ptr<detail::stream_impl>;
51 
52 class Command;
53 class AllocaCommand;
54 class AllocaCommandBase;
55 class ReleaseCommand;
56 class ExecCGCommand;
57 class EmptyCommand;
58 
60 
63  enum ResultT {
68  };
70  pi_int32 ErrCode = PI_SUCCESS)
71  : MResult(Result), MCmd(Cmd), MErrCode(ErrCode) {}
78 };
79 
81 struct DepDesc {
82  DepDesc(Command *DepCommand, const Requirement *Req,
83  AllocaCommandBase *AllocaCmd)
84  : MDepCommand(DepCommand), MDepRequirement(Req), MAllocaCmd(AllocaCmd) {}
85 
86  friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs) {
87  return std::tie(Lhs.MDepRequirement, Lhs.MDepCommand) <
89  }
90 
92  Command *MDepCommand = nullptr;
94  const Requirement *MDepRequirement = nullptr;
98 };
99 
107 class Command {
108 public:
109  enum CommandType {
123  };
124 
125  Command(CommandType Type, QueueImplPtr Queue,
126  sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr,
127  const std::vector<sycl::detail::pi::PiExtSyncPoint> &SyncPoints = {});
128 
132  [[nodiscard]] Command *addDep(DepDesc NewDep,
133  std::vector<Command *> &ToCleanUp);
134 
138  [[nodiscard]] Command *addDep(EventImplPtr Event,
139  std::vector<Command *> &ToCleanUp);
140 
141  void addUser(Command *NewUser) { MUsers.insert(NewUser); }
142 
144  CommandType getType() const { return MType; }
145 
153  virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
154  std::vector<Command *> &ToCleanUp);
155 
156  bool isFinished();
157 
158  bool isSuccessfullyEnqueued() const {
160  }
161 
162  // Shows that command could not be enqueued, now it may be true for empty task
163  // only
164  bool isEnqueueBlocked() const {
166  }
167  // Shows that command could be enqueued, but is blocking enqueue of all
168  // commands depending on it. Regular usage - host task.
169  bool isBlocking() const { return isHostTask() && !MEvent->isCompleted(); }
170 
171  void addBlockedUserUnique(const EventImplPtr &NewUser) {
172  if (std::find(MBlockedUsers.begin(), MBlockedUsers.end(), NewUser) !=
173  MBlockedUsers.end())
174  return;
175  MBlockedUsers.push_back(NewUser);
176  }
177 
178  const QueueImplPtr &getQueue() const { return MQueue; }
179 
180  const EventImplPtr &getEvent() const { return MEvent; }
181 
182  // Methods needed to support SYCL instrumentation
183 
187  virtual void emitInstrumentationData() = 0;
190  void resolveReleaseDependencies(std::set<Command *> &list);
193  Command *Cmd, void *ObjAddr, bool IsCommand,
194  std::optional<access::mode> AccMode = std::nullopt);
197  sycl::detail::pi::PiEvent &EventAddr);
204  uint64_t makeTraceEventProlog(void *MAddress);
207  void makeTraceEventEpilog();
209  void emitInstrumentation(uint16_t Type, const char *Txt = nullptr);
210 
211  // End Methods needed to support SYCL instrumentation
212 
213  virtual void printDot(std::ostream &Stream) const = 0;
214 
215  virtual const Requirement *getRequirement() const {
216  assert(false && "Internal Error. The command has no stored requirement");
217  return nullptr;
218  }
219 
220  virtual ~Command() { MEvent->cleanDepEventsThroughOneLevel(); }
221 
222  const char *getBlockReason() const;
223 
226  virtual const ContextImplPtr &getWorkerContext() const;
227 
230  const QueueImplPtr &getWorkerQueue() const;
231 
233  virtual bool producesPiEvent() const;
234 
236  virtual bool supportsPostEnqueueCleanup() const;
237 
239  virtual bool readyForCleanup() const;
240 
243  std::vector<sycl::detail::pi::PiEvent>
244  getPiEvents(const std::vector<EventImplPtr> &EventImpls) const;
248  std::vector<sycl::detail::pi::PiEvent>
249  getPiEventsBlocking(const std::vector<EventImplPtr> &EventImpls) const;
250 
251  bool isHostTask() const;
252 
253  bool isFusable() const;
254 
255 protected:
259 
262  std::vector<EventImplPtr> &MPreparedDepsEvents;
263  std::vector<EventImplPtr> &MPreparedHostDepsEvents;
264 
265  void waitForEvents(QueueImplPtr Queue, std::vector<EventImplPtr> &RawEvents,
267 
268  void waitForPreparedHostEvents() const;
269 
281  [[nodiscard]] Command *processDepEvent(EventImplPtr DepEvent,
282  const DepDesc &Dep,
283  std::vector<Command *> &ToCleanUp);
284 
286  virtual pi_int32 enqueueImp() = 0;
287 
291  std::mutex MEnqueueMtx;
292 
293  friend class DispatchHostTask;
294 
295 public:
296  const std::vector<EventImplPtr> &getPreparedHostDepsEvents() const {
298  }
299 
300  const std::vector<EventImplPtr> &getPreparedDepsEvents() const {
301  return MPreparedDepsEvents;
302  }
303 
304  // XPTI instrumentation. Copy code location details to the internal struct.
305  // Memory is allocated in this method and released in destructor.
307 
314  MPreparedDepsEvents.clear();
315  MPreparedHostDepsEvents.clear();
316  MDeps.clear();
317  }
318 
320  std::vector<DepDesc> MDeps;
322  std::unordered_set<Command *> MUsers;
324  bool MIsBlockable = false;
326  unsigned MLeafCounter = 0;
327 
328  struct Marks {
330  bool MVisited = false;
332  bool MToBeDeleted = false;
333  };
336 
337  enum class BlockReason : int { HostAccessor = 0, HostTask };
338 
339  // Only have reasonable value while MIsBlockable is true
341 
343  std::atomic<EnqueueResultT::ResultT> MEnqueueStatus;
344 
345  // All member variables defined here are needed for the SYCL instrumentation
346  // layer. Do not guard these variables below with XPTI_ENABLE_INSTRUMENTATION
347  // to ensure we have the same object layout when the macro in the library and
348  // SYCL app are not the same.
349 
351  void *MTraceEvent = nullptr;
355  int32_t MStreamID = -1;
358  void *MAddress = nullptr;
360  std::string MAddressString;
362  std::string MCommandNodeType;
364  std::string MCommandName;
368  bool MFirstInstance = false;
370  uint64_t MInstanceID = 0;
377  std::string MSubmissionFileName;
379 
380  // This flag allows to control whether host event should be set complete
381  // after successfull enqueue of command. Event is considered as host event if
382  // either it's is_host() return true or there is no backend representation
383  // of event (i.e. getHandleRef() return reference to nullptr value).
384  // By default the flag is set to true due to most of host operations are
385  // synchronous. The only asynchronous operation currently is host-task.
387 
391  bool MMarkedForCleanup = false;
392 
398  std::vector<EventImplPtr> MBlockedUsers;
399  std::mutex MBlockedUsersMutex;
400 
401 protected:
404  return MCommandBuffer;
405  }
406 
411  std::vector<sycl::detail::pi::PiExtSyncPoint> MSyncPointDeps;
412 };
413 
416 class EmptyCommand : public Command {
417 public:
418  EmptyCommand(QueueImplPtr Queue);
419 
420  void printDot(std::ostream &Stream) const final;
421  const Requirement *getRequirement() const final { return &MRequirements[0]; }
422  void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd,
423  const Requirement *Req);
424 
425  void emitInstrumentationData() override;
426 
427  bool producesPiEvent() const final;
428 
429 private:
430  pi_int32 enqueueImp() final;
431 
432  // Employing deque here as it allows to push_back/emplace_back without
433  // invalidation of pointer or reference to stored data item regardless of
434  // iterator invalidation.
435  std::deque<Requirement> MRequirements;
436 };
437 
440 class ReleaseCommand : public Command {
441 public:
442  ReleaseCommand(QueueImplPtr Queue, AllocaCommandBase *AllocaCmd);
443 
444  void printDot(std::ostream &Stream) const final;
445  void emitInstrumentationData() override;
446  bool producesPiEvent() const final;
447  bool supportsPostEnqueueCleanup() const final;
448  bool readyForCleanup() const final;
449 
450 private:
451  pi_int32 enqueueImp() final;
452 
454  AllocaCommandBase *MAllocaCmd = nullptr;
455 };
456 
458 class AllocaCommandBase : public Command {
459 public:
461  AllocaCommandBase *LinkedAllocaCmd, bool IsConst);
462 
463  ReleaseCommand *getReleaseCmd() { return &MReleaseCmd; }
464 
465  SYCLMemObjI *getSYCLMemObj() const { return MRequirement.MSYCLMemObj; }
466 
467  virtual void *getMemAllocation() const = 0;
468 
469  const Requirement *getRequirement() const final { return &MRequirement; }
470 
471  void emitInstrumentationData() override;
472 
473  bool producesPiEvent() const final;
474 
475  bool supportsPostEnqueueCleanup() const final;
476 
477  bool readyForCleanup() const final;
478 
479  void *MMemAllocation = nullptr;
480 
486  AllocaCommandBase *MLinkedAllocaCmd = nullptr;
488  bool MIsActive = true;
489 
492  bool MIsLeaderAlloca = true;
493  // Indicates that the data in this allocation must not be modified
494  bool MIsConst = false;
495 
496 protected:
497  Requirement MRequirement;
498  ReleaseCommand MReleaseCmd;
499 };
500 
504 public:
506  bool InitFromUserData = true,
507  AllocaCommandBase *LinkedAllocaCmd = nullptr,
508  bool IsConst = false);
509 
510  void *getMemAllocation() const final { return MMemAllocation; }
511  void printDot(std::ostream &Stream) const final;
512  void emitInstrumentationData() override;
513 
514 private:
515  pi_int32 enqueueImp() final;
516 
519  bool MInitFromUserData = false;
520 };
521 
524 public:
526  AllocaCommandBase *ParentAlloca,
527  std::vector<Command *> &ToEnqueue,
528  std::vector<Command *> &ToCleanUp);
529 
530  void *getMemAllocation() const final;
531  void printDot(std::ostream &Stream) const final;
532  AllocaCommandBase *getParentAlloca() { return MParentAlloca; }
533  void emitInstrumentationData() override;
534 
535 private:
536  pi_int32 enqueueImp() final;
537 
538  AllocaCommandBase *MParentAlloca = nullptr;
539 };
540 
542 class MapMemObject : public Command {
543 public:
544  MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req, void **DstPtr,
545  QueueImplPtr Queue, access::mode MapMode);
546 
547  void printDot(std::ostream &Stream) const final;
548  const Requirement *getRequirement() const final { return &MSrcReq; }
549  void emitInstrumentationData() override;
550 
551 private:
552  pi_int32 enqueueImp() final;
553 
554  AllocaCommandBase *MSrcAllocaCmd = nullptr;
555  Requirement MSrcReq;
556  void **MDstPtr = nullptr;
557  access::mode MMapMode;
558 };
559 
561 class UnMapMemObject : public Command {
562 public:
563  UnMapMemObject(AllocaCommandBase *DstAllocaCmd, Requirement Req,
564  void **SrcPtr, QueueImplPtr Queue);
565 
566  void printDot(std::ostream &Stream) const final;
567  const Requirement *getRequirement() const final { return &MDstReq; }
568  void emitInstrumentationData() override;
569  bool producesPiEvent() const final;
570 
571 private:
572  pi_int32 enqueueImp() final;
573 
574  AllocaCommandBase *MDstAllocaCmd = nullptr;
575  Requirement MDstReq;
576  void **MSrcPtr = nullptr;
577 };
578 
581 class MemCpyCommand : public Command {
582 public:
583  MemCpyCommand(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd,
584  Requirement DstReq, AllocaCommandBase *DstAllocaCmd,
585  QueueImplPtr SrcQueue, QueueImplPtr DstQueue);
586 
587  void printDot(std::ostream &Stream) const final;
588  const Requirement *getRequirement() const final { return &MDstReq; }
589  void emitInstrumentationData() final;
590  const ContextImplPtr &getWorkerContext() const final;
591  bool producesPiEvent() const final;
592 
593 private:
594  pi_int32 enqueueImp() final;
595 
596  QueueImplPtr MSrcQueue;
597  Requirement MSrcReq;
598  AllocaCommandBase *MSrcAllocaCmd = nullptr;
599  Requirement MDstReq;
600  AllocaCommandBase *MDstAllocaCmd = nullptr;
601 };
602 
605 class MemCpyCommandHost : public Command {
606 public:
607  MemCpyCommandHost(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd,
608  Requirement DstReq, void **DstPtr, QueueImplPtr SrcQueue,
609  QueueImplPtr DstQueue);
610 
611  void printDot(std::ostream &Stream) const final;
612  const Requirement *getRequirement() const final { return &MDstReq; }
613  void emitInstrumentationData() final;
614  const ContextImplPtr &getWorkerContext() const final;
615 
616 private:
617  pi_int32 enqueueImp() final;
618 
619  QueueImplPtr MSrcQueue;
620  Requirement MSrcReq;
621  AllocaCommandBase *MSrcAllocaCmd = nullptr;
622  Requirement MDstReq;
623  void **MDstPtr = nullptr;
624 };
625 
626 pi_int32
627 enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName,
628  bool blocking, void *ptr, size_t size,
629  std::vector<sycl::detail::pi::PiEvent> &RawEvents,
630  const detail::EventImplPtr &OutEventImpl, bool read);
631 
633  const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector<ArgDesc> &Args,
634  const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
635  const std::shared_ptr<detail::kernel_impl> &MSyclKernel,
636  const std::string &KernelName,
637  std::vector<sycl::detail::pi::PiEvent> &RawEvents,
638  const detail::EventImplPtr &Event,
639  const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
640  sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig,
641  bool KernelIsCooperative);
642 
643 class KernelFusionCommand;
644 
647 class ExecCGCommand : public Command {
648 public:
650  std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
651  sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr,
652  const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
653 
654  std::vector<std::shared_ptr<const void>> getAuxiliaryResources() const;
655 
656  void clearAuxiliaryResources();
657 
658  void printDot(std::ostream &Stream) const final;
659  void emitInstrumentationData() final;
660  std::string_view getTypeString() const;
661 
662  detail::CG &getCG() const { return *MCommandGroup; }
663 
664  // MEmptyCmd is only employed if this command refers to host-task.
665  // The mechanism of lookup for single EmptyCommand amongst users of
666  // host-task-representing command is unreliable. This unreliability roots in
667  // the cleanup process.
668  EmptyCommand *MEmptyCmd = nullptr;
669 
670  // MFusionCommand is employed to mark a CG command as part of a kernel fusion
671  // and allows to refer back to the corresponding KernelFusionCommand if
672  // necessary.
673  KernelFusionCommand *MFusionCmd = nullptr;
674 
675  bool producesPiEvent() const final;
676 
677  bool supportsPostEnqueueCleanup() const final;
678 
679  bool readyForCleanup() const final;
680 
681 private:
682  pi_int32 enqueueImp() final;
683  pi_int32 enqueueImpCommandBuffer();
684  pi_int32 enqueueImpQueue();
685 
686  AllocaCommandBase *getAllocaForReq(Requirement *Req);
687 
688  std::unique_ptr<detail::CG> MCommandGroup;
689 
690  friend class Command;
691 };
692 
693 // For XPTI instrumentation only.
694 // Method used to emit data in cases when we do not create node in graph.
695 // Very close to ExecCGCommand::emitInstrumentationData content.
696 #ifdef XPTI_ENABLE_INSTRUMENTATION
697 std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
698  int32_t StreamID, const std::shared_ptr<detail::kernel_impl> &SyclKernel,
699  const detail::code_location &CodeLoc, const std::string &SyclKernelName,
700  const QueueImplPtr &Queue, const NDRDescT &NDRDesc,
701  const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
702  std::vector<ArgDesc> &CGArgs);
703 #endif
704 
706 public:
708  AllocaCommandBase *SrcAllocaCmd, void **DstPtr);
709 
710  void printDot(std::ostream &Stream) const final;
711  const Requirement *getRequirement() const final { return &MDstReq; }
712  void emitInstrumentationData() final;
713 
714 private:
715  pi_int32 enqueueImp() final;
716 
717  AllocaCommandBase *MSrcAllocaCmd = nullptr;
718  Requirement MDstReq;
719  void **MDstPtr = nullptr;
720 };
721 
724 class KernelFusionCommand : public Command {
725 public:
726  enum class FusionStatus { ACTIVE, CANCELLED, COMPLETE, DELETED };
727 
728  explicit KernelFusionCommand(QueueImplPtr Queue);
729 
730  void printDot(std::ostream &Stream) const final;
731  void emitInstrumentationData() final;
732  bool producesPiEvent() const final;
733 
734  std::vector<Command *> &auxiliaryCommands();
735 
736  void addToFusionList(ExecCGCommand *Kernel);
737 
738  std::vector<ExecCGCommand *> &getFusionList();
739 
743  void setFusionStatus(FusionStatus Status);
744 
748  void resetQueue();
749 
750  bool isActive() const { return MStatus == FusionStatus::ACTIVE; }
751 
752  bool readyForDeletion() const { return MStatus == FusionStatus::DELETED; }
753 
754 private:
755  pi_int32 enqueueImp() final;
756 
757  std::vector<ExecCGCommand *> MFusionList;
758 
759  std::vector<Command *> MAuxiliaryCommands;
760 
761  FusionStatus MStatus;
762 };
763 
765 public:
767  QueueImplPtr Queue,
769  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
770  Nodes);
771 
772  void printDot(std::ostream &Stream) const final;
773  void emitInstrumentationData() final;
774  bool producesPiEvent() const final;
775 
776 private:
777  pi_int32 enqueueImp() final;
778 
780  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
781  MNodes;
782 };
783 
784 // Enqueues a given kernel to a PiExtCommandBuffer
786  context Ctx, DeviceImplPtr DeviceImpl,
788  const CGExecKernel &CommandGroup,
789  std::vector<sycl::detail::pi::PiExtSyncPoint> &SyncPoints,
790  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint,
792  const std::function<void *(Requirement *Req)> &getMemAllocationFunc);
793 
794 // Sets arguments for a given kernel and device based on the argument type.
795 // Refactored from SetKernelParamsAndLaunch to allow it to be used in the graphs
796 // extension.
798  const detail::plugin &Plugin, sycl::detail::pi::PiKernel Kernel,
799  const std::shared_ptr<device_image_impl> &DeviceImageImpl,
800  const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
801  const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg,
802  size_t NextTrueIndex);
803 
805  const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
806  std::function<void(detail::ArgDesc &Arg, int NextTrueIndex)> Func);
807 
809 
810 } // namespace detail
811 } // namespace _V1
812 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
Base class for memory allocation commands.
Definition: commands.hpp:458
const Requirement * getRequirement() const final
Definition: commands.hpp:469
virtual void * getMemAllocation() const =0
SYCLMemObjI * getSYCLMemObj() const
Definition: commands.hpp:465
The alloca command enqueues allocation of instance of memory object on Host or underlying framework.
Definition: commands.hpp:503
void * getMemAllocation() const final
Definition: commands.hpp:510
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
Definition: commands.hpp:523
AllocaCommandBase * getParentAlloca()
Definition: commands.hpp:532
"Execute kernel" command group class.
Definition: cg.hpp:167
Base class for all types of command groups.
Definition: cg.hpp:53
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:107
bool isSuccessfullyEnqueued() const
Definition: commands.hpp:158
Command * processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform glueing of events from different contexts.
Definition: commands.cpp:705
void * MTraceEvent
The event for node_create and task_begin.
Definition: commands.hpp:351
CommandType MType
The type of the command.
Definition: commands.hpp:289
virtual bool producesPiEvent() const
Returns true iff the command produces a PI event on non-host devices.
Definition: commands.cpp:749
void emitEnqueuedEventSignal(sycl::detail::pi::PiEvent &PiEventAddr)
Creates a signal event with the enqueued kernel event handle.
Definition: commands.cpp:798
int32_t MStreamID
The stream under which the traces are emitted.
Definition: commands.hpp:355
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
Definition: commands.hpp:300
void emitInstrumentation(uint16_t Type, const char *Txt=nullptr)
Emits an event of Type.
Definition: commands.cpp:807
virtual void emitInstrumentationData()=0
Instrumentation method which emits telemetry data.
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
Definition: commands.hpp:296
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
Definition: commands.cpp:909
std::string MCommandName
Buffer to build the command end-user understandable name.
Definition: commands.hpp:364
sycl::detail::pi::PiExtCommandBuffer getCommandBuffer() const
Gets the command buffer (if any) associated with this command.
Definition: commands.hpp:403
virtual pi_int32 enqueueImp()=0
Private interface. Derived classes should implement this method.
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
Definition: commands.hpp:391
void emitEdgeEventForEventDependence(Command *Cmd, sycl::detail::pi::PiEvent &EventAddr)
Creates an edge event when the dependency is an event.
Definition: commands.cpp:602
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:326
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector< Command * > &ToCleanUp)
Checks if the command is enqueued, and calls enqueueImp.
Definition: commands.cpp:818
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:322
std::vector< sycl::detail::pi::PiEvent > getPiEvents(const std::vector< EventImplPtr > &EventImpls) const
Collect PI events from EventImpls and filter out some of them in case of in order queue.
Definition: commands.cpp:232
void waitForPreparedHostEvents() const
Definition: commands.cpp:448
std::string MSubmissionFileName
Introduces string to handle memory management since code_location struct works with raw char arrays.
Definition: commands.hpp:377
std::mutex MEnqueueMtx
Mutex used to protect enqueueing from race conditions.
Definition: commands.hpp:291
virtual const ContextImplPtr & getWorkerContext() const
Get the context of the queue this command will be submitted to.
Definition: commands.cpp:740
void emitInstrumentationDataProxy()
Proxy method which calls emitInstrumentationData.
Definition: commands.cpp:535
code_location MSubmissionCodeLocation
Represents code location of command submission to SYCL API, assigned with the valid value only if com...
Definition: commands.hpp:374
void makeTraceEventEpilog()
If prolog has been run, run epilog; this must be guarded by a check for xptiTraceEnabled().
Definition: commands.cpp:692
std::vector< sycl::detail::pi::PiEvent > getPiEventsBlocking(const std::vector< EventImplPtr > &EventImpls) const
Collect PI events from EventImpls and filter out some of them in case of in order queue.
Definition: commands.cpp:259
Marks MMarks
Used for marking the node during graph traversal.
Definition: commands.hpp:335
std::vector< EventImplPtr > & MPreparedHostDepsEvents
Definition: commands.hpp:263
std::vector< EventImplPtr > & MPreparedDepsEvents
Dependency events prepared for waiting by backend.
Definition: commands.hpp:262
Command(CommandType Type, QueueImplPtr Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &SyncPoints={})
It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents references to event_impl class mem...
Definition: commands.cpp:510
std::string MSubmissionFunctionName
Definition: commands.hpp:378
uint64_t MInstanceID
Instance ID tracked for the command.
Definition: commands.hpp:370
const QueueImplPtr & getWorkerQueue() const
Get the queue this command will be submitted to.
Definition: commands.cpp:744
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:320
const char * getBlockReason() const
Definition: commands.cpp:951
std::vector< sycl::detail::pi::PiExtSyncPoint > MSyncPointDeps
List of sync points for submissions to a command buffer.
Definition: commands.hpp:411
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
Definition: commands.cpp:753
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
Definition: commands.hpp:398
void addUser(Command *NewUser)
Definition: commands.hpp:141
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
Definition: commands.hpp:343
void clearAllDependencies()
Clear all dependency events This should only be used if a command is about to be deleted without bein...
Definition: commands.hpp:313
const EventImplPtr & getEvent() const
Definition: commands.hpp:180
uint64_t makeTraceEventProlog(void *MAddress)
Create a trace event of node_create type; this must be guarded by a check for xptiTraceEnabled().
Definition: commands.cpp:660
CommandType getType() const
Definition: commands.hpp:144
virtual void printDot(std::ostream &Stream) const =0
bool isEnqueueBlocked() const
Definition: commands.hpp:164
void * MAddress
Reserved for storing the object address such as SPIR-V or memory object address.
Definition: commands.hpp:358
std::string MAddressString
Buffer to build the address string.
Definition: commands.hpp:360
void waitForEvents(QueueImplPtr Queue, std::vector< EventImplPtr > &RawEvents, sycl::detail::pi::PiEvent &Event)
Definition: commands.cpp:453
void addBlockedUserUnique(const EventImplPtr &NewUser)
Definition: commands.hpp:171
bool MIsBlockable
Indicates whether the command can be blocked from enqueueing.
Definition: commands.hpp:324
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
Definition: commands.cpp:751
bool MTraceEventPrologComplete
Flag to indicate if makeTraceEventProlog() has been run.
Definition: commands.hpp:366
sycl::detail::pi::PiExtCommandBuffer MCommandBuffer
CommandBuffer which will be used to submit to instead of the queue, if set.
Definition: commands.hpp:409
std::string MCommandNodeType
Buffer to build the command node type.
Definition: commands.hpp:362
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
Definition: commands.cpp:758
void emitEdgeEventForCommandDependence(Command *Cmd, void *ObjAddr, bool IsCommand, std::optional< access::mode > AccMode=std::nullopt)
Creates an edge event when the dependency is a command.
Definition: commands.cpp:551
const QueueImplPtr & getQueue() const
Definition: commands.hpp:178
virtual const Requirement * getRequirement() const
Definition: commands.hpp:215
bool MFirstInstance
Flag to indicate if this is the first time we are seeing this payload.
Definition: commands.hpp:368
The empty command does nothing during enqueue.
Definition: commands.hpp:416
const Requirement * getRequirement() const final
Definition: commands.hpp:421
bool producesPiEvent() const final
Returns true iff the command produces a PI event on non-host devices.
Definition: commands.cpp:1800
void printDot(std::ostream &Stream) const final
Definition: commands.cpp:1782
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req)
Definition: commands.cpp:1733
void emitInstrumentationData() override
Instrumentation method which emits telemetry data.
Definition: commands.cpp:1749
EmptyCommand(QueueImplPtr Queue)
Definition: commands.cpp:1721
The exec CG command enqueues execution of kernel or explicit memory operation.
Definition: commands.hpp:647
detail::CG & getCG() const
Definition: commands.hpp:662
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
Definition: commands.hpp:724
The map command enqueues mapping of device memory onto host memory.
Definition: commands.hpp:542
const Requirement * getRequirement() const final
Definition: commands.hpp:548
The mem copy host command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:605
const Requirement * getRequirement() const final
Definition: commands.hpp:612
The mem copy command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:581
const Requirement * getRequirement() const final
Definition: commands.hpp:588
The release command enqueues release of a memory object instance allocated on Host or underlying fram...
Definition: commands.hpp:440
The unmap command removes mapping of host memory onto device memory.
Definition: commands.hpp:561
const Requirement * getRequirement() const final
Definition: commands.hpp:567
const Requirement * getRequirement() const final
Definition: commands.hpp:711
The class is an impl counterpart of the sycl::kernel_bundle.
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:91
Class representing the implementation of command_graph<executable>.
::pi_event PiEvent
Definition: pi.hpp:143
::pi_kernel_cache_config PiKernelCacheConfig
Definition: pi.hpp:155
::pi_ext_sync_point PiExtSyncPoint
Definition: pi.hpp:156
::pi_ext_command_buffer_command PiExtCommandBufferCommand
Definition: pi.hpp:159
std::vector< bool > KernelArgMask
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
Definition: commands.cpp:2261
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
pi_int32 enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read)
Definition: commands.cpp:2679
std::shared_ptr< detail::stream_impl > StreamImplPtr
Definition: commands.hpp:50
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< device_impl > DeviceImplPtr
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative)
Definition: commands.cpp:2558
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
Definition: commands.cpp:110
auto tie(Ts &...Args)
Definition: tuple.hpp:39
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:34
void SetArgBasedOnType(const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel, const std::shared_ptr< device_image_impl > &DeviceImageImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg, size_t NextTrueIndex)
Definition: commands.cpp:2281
pi_int32 enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const CGExecKernel &CommandGroup, std::vector< sycl::detail::pi::PiExtSyncPoint > &SyncPoints, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint, sycl::detail::pi::PiExtCommandBufferCommand *OutCommand, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
Definition: commands.cpp:2454
Definition: access.hpp:18
int32_t pi_int32
Definition: pi.h:212
bool MVisited
Used for marking the node as visited during graph traversal.
Definition: commands.hpp:330
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Definition: commands.hpp:332
Dependency between two commands.
Definition: commands.hpp:81
const Requirement * MDepRequirement
Requirement for the dependency.
Definition: commands.hpp:94
friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs)
Definition: commands.hpp:86
Command * MDepCommand
The actual dependency command.
Definition: commands.hpp:92
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
Definition: commands.hpp:97
DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd)
Definition: commands.hpp:82
Result of command enqueueing.
Definition: commands.hpp:62
ResultT MResult
Indicates the result of enqueueing.
Definition: commands.hpp:73
pi_int32 MErrCode
Error code which is set when enqueueing fails.
Definition: commands.hpp:77
EnqueueResultT(ResultT Result=SyclEnqueueSuccess, Command *Cmd=nullptr, pi_int32 ErrCode=PI_SUCCESS)
Definition: commands.hpp:69
Command * MCmd
Pointer to the command which failed to enqueue.
Definition: commands.hpp:75