DPC++ Runtime
Runtime libraries for oneAPI DPC++
commands.hpp
Go to the documentation of this file.
1 //==-------------- commands.hpp - SYCL standard header file ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <atomic>
12 #include <cstdint>
13 #include <deque>
14 #include <memory>
15 #include <optional>
16 #include <set>
17 #include <unordered_set>
18 #include <vector>
19 
20 #include <detail/accessor_impl.hpp>
21 #include <detail/cg.hpp>
22 #include <detail/event_impl.hpp>
24 #include <sycl/access/access.hpp>
25 
26 namespace sycl {
27 inline namespace _V1 {
28 
29 namespace ext::oneapi::experimental::detail {
30 class exec_graph_impl;
31 class node_impl;
32 } // namespace ext::oneapi::experimental::detail
33 namespace detail {
34 
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
37  xpti_td *TraceEvent, uint16_t Type,
38  const void *Addr);
39 #endif
40 RTDeviceBinaryImage *
42  const std::string &KernelName);
43 
44 class queue_impl;
45 class event_impl;
46 class context_impl;
47 class DispatchHostTask;
48 
49 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
50 using EventImplPtr = std::shared_ptr<detail::event_impl>;
51 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
52 using StreamImplPtr = std::shared_ptr<detail::stream_impl>;
53 
54 class Command;
55 class AllocaCommand;
56 class AllocaCommandBase;
57 class ReleaseCommand;
58 class ExecCGCommand;
59 class EmptyCommand;
60 
62 
65  enum ResultT {
70  };
72  ur_result_t ErrCode = UR_RESULT_SUCCESS)
73  : MResult(Result), MCmd(Cmd), MErrCode(ErrCode) {}
79  ur_result_t MErrCode;
80 };
81 
83 struct DepDesc {
84  DepDesc(Command *DepCommand, const Requirement *Req,
85  AllocaCommandBase *AllocaCmd)
86  : MDepCommand(DepCommand), MDepRequirement(Req), MAllocaCmd(AllocaCmd) {}
87 
88  friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs) {
89  return std::tie(Lhs.MDepRequirement, Lhs.MDepCommand) <
91  }
92 
94  Command *MDepCommand = nullptr;
96  const Requirement *MDepRequirement = nullptr;
100 };
101 
109 class Command {
110 public:
111  enum CommandType {
125  };
126 
127  Command(
128  CommandType Type, QueueImplPtr Queue,
129  ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
130  const std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints = {});
131 
135  [[nodiscard]] Command *addDep(DepDesc NewDep,
136  std::vector<Command *> &ToCleanUp);
137 
141  [[nodiscard]] Command *addDep(EventImplPtr Event,
142  std::vector<Command *> &ToCleanUp);
143 
144  void addUser(Command *NewUser) { MUsers.insert(NewUser); }
145 
147  CommandType getType() const { return MType; }
148 
156  virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
157  std::vector<Command *> &ToCleanUp);
158 
159  bool isFinished();
160 
161  bool isSuccessfullyEnqueued() const {
163  }
164 
165  // Shows that command could not be enqueued, now it may be true for empty task
166  // only
167  bool isEnqueueBlocked() const {
169  }
170  // Shows that command could be enqueued, but is blocking enqueue of all
171  // commands depending on it. Regular usage - host task.
172  bool isBlocking() const { return isHostTask() && !MEvent->isCompleted(); }
173 
174  void addBlockedUserUnique(const EventImplPtr &NewUser) {
175  if (std::find(MBlockedUsers.begin(), MBlockedUsers.end(), NewUser) !=
176  MBlockedUsers.end())
177  return;
178  MBlockedUsers.push_back(NewUser);
179  }
180 
181  const QueueImplPtr &getQueue() const { return MQueue; }
182 
183  const EventImplPtr &getEvent() const { return MEvent; }
184 
185  // Methods needed to support SYCL instrumentation
186 
190  virtual void emitInstrumentationData() = 0;
193  void resolveReleaseDependencies(std::set<Command *> &list);
196  Command *Cmd, void *ObjAddr, bool IsCommand,
197  std::optional<access::mode> AccMode = std::nullopt);
200  ur_event_handle_t &EventAddr);
202  void emitEnqueuedEventSignal(ur_event_handle_t &UrEventAddr);
207  uint64_t makeTraceEventProlog(void *MAddress);
210  void makeTraceEventEpilog();
212  void emitInstrumentation(uint16_t Type, const char *Txt = nullptr);
213 
214  // End Methods needed to support SYCL instrumentation
215 
216  virtual void printDot(std::ostream &Stream) const = 0;
217 
218  virtual const Requirement *getRequirement() const {
219  assert(false && "Internal Error. The command has no stored requirement");
220  return nullptr;
221  }
222 
223  virtual ~Command() { MEvent->cleanDepEventsThroughOneLevel(); }
224 
225  const char *getBlockReason() const;
226 
229  virtual ContextImplPtr getWorkerContext() const;
230 
232  virtual bool producesPiEvent() const;
233 
235  virtual bool supportsPostEnqueueCleanup() const;
236 
238  virtual bool readyForCleanup() const;
239 
242  std::vector<ur_event_handle_t>
243  getUrEvents(const std::vector<EventImplPtr> &EventImpls) const;
247  std::vector<ur_event_handle_t>
248  getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls) const;
249 
250  bool isHostTask() const;
251 
252  bool isFusable() const;
253 
254 protected:
258 
261  std::vector<EventImplPtr> &MPreparedDepsEvents;
262  std::vector<EventImplPtr> &MPreparedHostDepsEvents;
263 
264  void waitForEvents(QueueImplPtr Queue, std::vector<EventImplPtr> &RawEvents,
265  ur_event_handle_t &Event);
266 
267  void waitForPreparedHostEvents() const;
268 
280  [[nodiscard]] Command *processDepEvent(EventImplPtr DepEvent,
281  const DepDesc &Dep,
282  std::vector<Command *> &ToCleanUp);
283 
285  virtual ur_result_t enqueueImp() = 0;
286 
290  std::mutex MEnqueueMtx;
291 
292  friend class DispatchHostTask;
293 
294 public:
295  const std::vector<EventImplPtr> &getPreparedHostDepsEvents() const {
297  }
298 
299  const std::vector<EventImplPtr> &getPreparedDepsEvents() const {
300  return MPreparedDepsEvents;
301  }
302 
303  // XPTI instrumentation. Copy code location details to the internal struct.
304  // Memory is allocated in this method and released in destructor.
306 
313  MPreparedDepsEvents.clear();
314  MPreparedHostDepsEvents.clear();
315  MDeps.clear();
316  }
317 
319  std::vector<DepDesc> MDeps;
321  std::unordered_set<Command *> MUsers;
323  bool MIsBlockable = false;
325  unsigned MLeafCounter = 0;
326 
327  struct Marks {
329  bool MVisited = false;
331  bool MToBeDeleted = false;
332  };
335 
336  enum class BlockReason : int { HostAccessor = 0, HostTask };
337 
338  // Only have reasonable value while MIsBlockable is true
340 
342  std::atomic<EnqueueResultT::ResultT> MEnqueueStatus;
343 
344  // All member variables defined here are needed for the SYCL instrumentation
345  // layer. Do not guard these variables below with XPTI_ENABLE_INSTRUMENTATION
346  // to ensure we have the same object layout when the macro in the library and
347  // SYCL app are not the same.
348 
350  void *MTraceEvent = nullptr;
354  int32_t MStreamID = -1;
357  void *MAddress = nullptr;
359  std::string MAddressString;
361  std::string MCommandNodeType;
363  std::string MCommandName;
367  bool MFirstInstance = false;
369  uint64_t MInstanceID = 0;
376  std::string MSubmissionFileName;
378 
379  // This flag allows to control whether event should be set complete
380  // after successfull enqueue of command. Event is considered as "host" event
381  // if there is no backend representation of event (i.e. getHandleRef() return
382  // reference to nullptr value). By default the flag is set to true due to most
383  // of host operations are synchronous. The only asynchronous operation
384  // currently is host-task.
386 
390  bool MMarkedForCleanup = false;
391 
397  std::vector<EventImplPtr> MBlockedUsers;
398  std::mutex MBlockedUsersMutex;
399 
400 protected:
402  ur_exp_command_buffer_handle_t getCommandBuffer() const {
403  return MCommandBuffer;
404  }
405 
408  ur_exp_command_buffer_handle_t MCommandBuffer;
410  std::vector<ur_exp_command_buffer_sync_point_t> MSyncPointDeps;
411 };
412 
415 class EmptyCommand : public Command {
416 public:
417  EmptyCommand();
418 
419  void printDot(std::ostream &Stream) const final;
420  const Requirement *getRequirement() const final { return &MRequirements[0]; }
421  void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd,
422  const Requirement *Req);
423 
424  void emitInstrumentationData() override;
425 
426  bool producesPiEvent() const final;
427 
428 private:
429  ur_result_t enqueueImp() final;
430 
431  // Employing deque here as it allows to push_back/emplace_back without
432  // invalidation of pointer or reference to stored data item regardless of
433  // iterator invalidation.
434  std::deque<Requirement> MRequirements;
435 };
436 
439 class ReleaseCommand : public Command {
440 public:
441  ReleaseCommand(QueueImplPtr Queue, AllocaCommandBase *AllocaCmd);
442 
443  void printDot(std::ostream &Stream) const final;
444  void emitInstrumentationData() override;
445  bool producesPiEvent() const final;
446  bool supportsPostEnqueueCleanup() const final;
447  bool readyForCleanup() const final;
448 
449 private:
450  ur_result_t enqueueImp() final;
451 
453  AllocaCommandBase *MAllocaCmd = nullptr;
454 };
455 
457 class AllocaCommandBase : public Command {
458 public:
460  AllocaCommandBase *LinkedAllocaCmd, bool IsConst);
461 
462  ReleaseCommand *getReleaseCmd() { return &MReleaseCmd; }
463 
464  SYCLMemObjI *getSYCLMemObj() const { return MRequirement.MSYCLMemObj; }
465 
466  virtual void *getMemAllocation() const = 0;
467 
468  const Requirement *getRequirement() const final { return &MRequirement; }
469 
470  void emitInstrumentationData() override;
471 
472  bool producesPiEvent() const final;
473 
474  bool supportsPostEnqueueCleanup() const final;
475 
476  bool readyForCleanup() const final;
477 
478  void *MMemAllocation = nullptr;
479 
485  AllocaCommandBase *MLinkedAllocaCmd = nullptr;
487  bool MIsActive = true;
488 
491  bool MIsLeaderAlloca = true;
492  // Indicates that the data in this allocation must not be modified
493  bool MIsConst = false;
494 
495 protected:
496  Requirement MRequirement;
497  ReleaseCommand MReleaseCmd;
498 };
499 
503 public:
505  bool InitFromUserData = true,
506  AllocaCommandBase *LinkedAllocaCmd = nullptr,
507  bool IsConst = false);
508 
509  void *getMemAllocation() const final { return MMemAllocation; }
510  void printDot(std::ostream &Stream) const final;
511  void emitInstrumentationData() override;
512 
513 private:
514  ur_result_t enqueueImp() final;
515 
518  bool MInitFromUserData = false;
519 };
520 
523 public:
525  AllocaCommandBase *ParentAlloca,
526  std::vector<Command *> &ToEnqueue,
527  std::vector<Command *> &ToCleanUp);
528 
529  void *getMemAllocation() const final;
530  void printDot(std::ostream &Stream) const final;
531  AllocaCommandBase *getParentAlloca() { return MParentAlloca; }
532  void emitInstrumentationData() override;
533 
534 private:
535  ur_result_t enqueueImp() final;
536 
537  AllocaCommandBase *MParentAlloca = nullptr;
538 };
539 
541 class MapMemObject : public Command {
542 public:
543  MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req, void **DstPtr,
544  QueueImplPtr Queue, access::mode MapMode);
545 
546  void printDot(std::ostream &Stream) const final;
547  const Requirement *getRequirement() const final { return &MSrcReq; }
548  void emitInstrumentationData() override;
549 
550 private:
551  ur_result_t enqueueImp() final;
552 
553  AllocaCommandBase *MSrcAllocaCmd = nullptr;
554  Requirement MSrcReq;
555  void **MDstPtr = nullptr;
556  access::mode MMapMode;
557 };
558 
560 class UnMapMemObject : public Command {
561 public:
562  UnMapMemObject(AllocaCommandBase *DstAllocaCmd, Requirement Req,
563  void **SrcPtr, QueueImplPtr Queue);
564 
565  void printDot(std::ostream &Stream) const final;
566  const Requirement *getRequirement() const final { return &MDstReq; }
567  void emitInstrumentationData() override;
568  bool producesPiEvent() const final;
569 
570 private:
571  ur_result_t enqueueImp() final;
572 
573  AllocaCommandBase *MDstAllocaCmd = nullptr;
574  Requirement MDstReq;
575  void **MSrcPtr = nullptr;
576 };
577 
580 class MemCpyCommand : public Command {
581 public:
582  MemCpyCommand(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd,
583  Requirement DstReq, AllocaCommandBase *DstAllocaCmd,
584  QueueImplPtr SrcQueue, QueueImplPtr DstQueue);
585 
586  void printDot(std::ostream &Stream) const final;
587  const Requirement *getRequirement() const final { return &MDstReq; }
588  void emitInstrumentationData() final;
589  ContextImplPtr getWorkerContext() const final;
590  bool producesPiEvent() const final;
591 
592 private:
593  ur_result_t enqueueImp() final;
594 
595  QueueImplPtr MSrcQueue;
596  Requirement MSrcReq;
597  AllocaCommandBase *MSrcAllocaCmd = nullptr;
598  Requirement MDstReq;
599  AllocaCommandBase *MDstAllocaCmd = nullptr;
600 };
601 
604 class MemCpyCommandHost : public Command {
605 public:
606  MemCpyCommandHost(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd,
607  Requirement DstReq, void **DstPtr, QueueImplPtr SrcQueue,
608  QueueImplPtr DstQueue);
609 
610  void printDot(std::ostream &Stream) const final;
611  const Requirement *getRequirement() const final { return &MDstReq; }
612  void emitInstrumentationData() final;
613  ContextImplPtr getWorkerContext() const final;
614 
615 private:
616  ur_result_t enqueueImp() final;
617 
618  QueueImplPtr MSrcQueue;
619  Requirement MSrcReq;
620  AllocaCommandBase *MSrcAllocaCmd = nullptr;
621  Requirement MDstReq;
622  void **MDstPtr = nullptr;
623 };
624 
625 ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue,
626  const std::string &PipeName, bool blocking,
627  void *ptr, size_t size,
628  std::vector<ur_event_handle_t> &RawEvents,
629  const detail::EventImplPtr &OutEventImpl,
630  bool read);
631 
632 void enqueueImpKernel(
633  const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector<ArgDesc> &Args,
634  const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
635  const std::shared_ptr<detail::kernel_impl> &MSyclKernel,
636  const std::string &KernelName, std::vector<ur_event_handle_t> &RawEvents,
637  const detail::EventImplPtr &Event,
638  const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
639  ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative,
640  const bool KernelUsesClusterLaunch,
641  const RTDeviceBinaryImage *BinImage = nullptr);
642 
643 class KernelFusionCommand;
644 
647 class ExecCGCommand : public Command {
648 public:
650  std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
651  bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
652  const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies = {});
653 
654  std::vector<std::shared_ptr<const void>> getAuxiliaryResources() const;
655 
656  void clearAuxiliaryResources();
657 
658  void printDot(std::ostream &Stream) const final;
659  void emitInstrumentationData() final;
660  std::string_view getTypeString() const;
661 
662  detail::CG &getCG() const { return *MCommandGroup; }
663 
664  // MEmptyCmd is only employed if this command refers to host-task.
665  // The mechanism of lookup for single EmptyCommand amongst users of
666  // host-task-representing command is unreliable. This unreliability roots in
667  // the cleanup process.
668  EmptyCommand *MEmptyCmd = nullptr;
669 
670  // MFusionCommand is employed to mark a CG command as part of a kernel fusion
671  // and allows to refer back to the corresponding KernelFusionCommand if
672  // necessary.
673  KernelFusionCommand *MFusionCmd = nullptr;
674 
675  // MEventNeeded is true if the command needs to produce a valid event. The
676  // implementation may elect to not produce events (native or SYCL) if this
677  // is false.
678  bool MEventNeeded = true;
679 
680  bool producesPiEvent() const final;
681 
682  bool supportsPostEnqueueCleanup() const final;
683 
684  bool readyForCleanup() const final;
685 
686 private:
687  ur_result_t enqueueImp() final;
688  ur_result_t enqueueImpCommandBuffer();
689  ur_result_t enqueueImpQueue();
690 
691  AllocaCommandBase *getAllocaForReq(Requirement *Req);
692 
693  std::unique_ptr<detail::CG> MCommandGroup;
694 
695  friend class Command;
696 };
697 
698 // For XPTI instrumentation only.
699 // Method used to emit data in cases when we do not create node in graph.
700 // Very close to ExecCGCommand::emitInstrumentationData content.
701 #ifdef XPTI_ENABLE_INSTRUMENTATION
702 std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
703  int32_t StreamID, const std::shared_ptr<detail::kernel_impl> &SyclKernel,
704  const detail::code_location &CodeLoc, const std::string &SyclKernelName,
705  const QueueImplPtr &Queue, const NDRDescT &NDRDesc,
706  const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
707  std::vector<ArgDesc> &CGArgs);
708 #endif
709 
711 public:
713  AllocaCommandBase *SrcAllocaCmd, void **DstPtr);
714 
715  void printDot(std::ostream &Stream) const final;
716  const Requirement *getRequirement() const final { return &MDstReq; }
717  void emitInstrumentationData() final;
718 
719 private:
720  ur_result_t enqueueImp() final;
721 
722  AllocaCommandBase *MSrcAllocaCmd = nullptr;
723  Requirement MDstReq;
724  void **MDstPtr = nullptr;
725 };
726 
729 class KernelFusionCommand : public Command {
730 public:
731  enum class FusionStatus { ACTIVE, CANCELLED, COMPLETE, DELETED };
732 
733  explicit KernelFusionCommand(QueueImplPtr Queue);
734 
735  void printDot(std::ostream &Stream) const final;
736  void emitInstrumentationData() final;
737  bool producesPiEvent() const final;
738 
739  std::vector<Command *> &auxiliaryCommands();
740 
741  void addToFusionList(ExecCGCommand *Kernel);
742 
743  std::vector<ExecCGCommand *> &getFusionList();
744 
748  void setFusionStatus(FusionStatus Status);
749 
753  void resetQueue();
754 
755  bool isActive() const { return MStatus == FusionStatus::ACTIVE; }
756 
757  bool readyForDeletion() const { return MStatus == FusionStatus::DELETED; }
758 
759 private:
760  ur_result_t enqueueImp() final;
761 
762  std::vector<ExecCGCommand *> MFusionList;
763 
764  std::vector<Command *> MAuxiliaryCommands;
765 
766  FusionStatus MStatus;
767 };
768 
770 public:
772  QueueImplPtr Queue,
774  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
775  Nodes);
776 
777  void printDot(std::ostream &Stream) const final;
778  void emitInstrumentationData() final;
779  bool producesPiEvent() const final;
780 
781 private:
782  ur_result_t enqueueImp() final;
783 
785  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
786  MNodes;
787 };
788 
789 // Enqueues a given kernel to a ur_exp_command_buffer_handle_t
791  context Ctx, DeviceImplPtr DeviceImpl,
792  ur_exp_command_buffer_handle_t CommandBuffer,
793  const CGExecKernel &CommandGroup,
794  std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints,
795  ur_exp_command_buffer_sync_point_t *OutSyncPoint,
796  ur_exp_command_buffer_command_handle_t *OutCommand,
797  const std::function<void *(Requirement *Req)> &getMemAllocationFunc);
798 
799 // Sets arguments for a given kernel and device based on the argument type.
800 // Refactored from SetKernelParamsAndLaunch to allow it to be used in the graphs
801 // extension.
802 void SetArgBasedOnType(
803  const detail::PluginPtr &Plugin, ur_kernel_handle_t Kernel,
804  const std::shared_ptr<device_image_impl> &DeviceImageImpl,
805  const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
806  const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex);
807 
809  const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
810  std::function<void(detail::ArgDesc &Arg, int NextTrueIndex)> Func);
811 
813 
814 } // namespace detail
815 } // namespace _V1
816 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
Base class for memory allocation commands.
Definition: commands.hpp:457
const Requirement * getRequirement() const final
Definition: commands.hpp:468
virtual void * getMemAllocation() const =0
SYCLMemObjI * getSYCLMemObj() const
Definition: commands.hpp:464
The alloca command enqueues allocation of instance of memory object on Host or underlying framework.
Definition: commands.hpp:502
void * getMemAllocation() const final
Definition: commands.hpp:509
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
Definition: commands.hpp:522
AllocaCommandBase * getParentAlloca()
Definition: commands.hpp:531
"Execute kernel" command group class.
Definition: cg.hpp:246
Base class for all types of command groups.
Definition: cg.hpp:160
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:109
bool isSuccessfullyEnqueued() const
Definition: commands.hpp:161
virtual ur_result_t enqueueImp()=0
Private interface. Derived classes should implement this method.
Command * processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform glueing of events from different contexts.
Definition: commands.cpp:752
void * MTraceEvent
The event for node_create and task_begin.
Definition: commands.hpp:350
CommandType MType
The type of the command.
Definition: commands.hpp:288
virtual bool producesPiEvent() const
Returns true iff the command produces a UR event on non-host devices.
Definition: commands.cpp:791
int32_t MStreamID
The stream under which the traces are emitted.
Definition: commands.hpp:354
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
Definition: commands.hpp:299
void emitInstrumentation(uint16_t Type, const char *Txt=nullptr)
Emits an event of Type.
Definition: commands.cpp:849
virtual void emitInstrumentationData()=0
Instrumentation method which emits telemetry data.
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
Definition: commands.hpp:295
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
Definition: commands.cpp:952
std::string MCommandName
Buffer to build the command end-user understandable name.
Definition: commands.hpp:363
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
Definition: commands.hpp:390
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:325
std::vector< ur_exp_command_buffer_sync_point_t > MSyncPointDeps
List of sync points for submissions to a command buffer.
Definition: commands.hpp:410
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector< Command * > &ToCleanUp)
Checks if the command is enqueued, and calls enqueueImp.
Definition: commands.cpp:860
void waitForEvents(QueueImplPtr Queue, std::vector< EventImplPtr > &RawEvents, ur_event_handle_t &Event)
Definition: commands.cpp:500
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:321
void waitForPreparedHostEvents() const
Definition: commands.cpp:495
std::string MSubmissionFileName
Introduces string to handle memory management since code_location struct works with raw char arrays.
Definition: commands.hpp:376
std::vector< ur_event_handle_t > getUrEvents(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
Definition: commands.cpp:236
std::mutex MEnqueueMtx
Mutex used to protect enqueueing from race conditions.
Definition: commands.hpp:290
void emitInstrumentationDataProxy()
Proxy method which calls emitInstrumentationData.
Definition: commands.cpp:583
code_location MSubmissionCodeLocation
Represents code location of command submission to SYCL API, assigned with the valid value only if com...
Definition: commands.hpp:373
void emitEnqueuedEventSignal(ur_event_handle_t &UrEventAddr)
Creates a signal event with the enqueued kernel event handle.
Definition: commands.cpp:840
void makeTraceEventEpilog()
If prolog has been run, run epilog; this must be guarded by a check for xptiTraceEnabled().
Definition: commands.cpp:739
Marks MMarks
Used for marking the node during graph traversal.
Definition: commands.hpp:334
ur_exp_command_buffer_handle_t getCommandBuffer() const
Gets the command buffer (if any) associated with this command.
Definition: commands.hpp:402
std::vector< EventImplPtr > & MPreparedHostDepsEvents
Definition: commands.hpp:262
std::vector< EventImplPtr > & MPreparedDepsEvents
Dependency events prepared for waiting by backend.
Definition: commands.hpp:261
std::string MSubmissionFunctionName
Definition: commands.hpp:377
uint64_t MInstanceID
Instance ID tracked for the command.
Definition: commands.hpp:369
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:319
const char * getBlockReason() const
Definition: commands.cpp:994
std::vector< ur_event_handle_t > getUrEventsBlocking(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
Definition: commands.cpp:260
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
Definition: commands.cpp:795
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
Definition: commands.hpp:397
void addUser(Command *NewUser)
Definition: commands.hpp:144
virtual ContextImplPtr getWorkerContext() const
Get the context of the queue this command will be submitted to.
Definition: commands.cpp:785
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
Definition: commands.hpp:342
void clearAllDependencies()
Clear all dependency events This should only be used if a command is about to be deleted without bein...
Definition: commands.hpp:312
const EventImplPtr & getEvent() const
Definition: commands.hpp:183
ur_exp_command_buffer_handle_t MCommandBuffer
CommandBuffer which will be used to submit to instead of the queue, if set.
Definition: commands.hpp:408
uint64_t makeTraceEventProlog(void *MAddress)
Create a trace event of node_create type; this must be guarded by a check for xptiTraceEnabled().
Definition: commands.cpp:707
CommandType getType() const
Definition: commands.hpp:147
virtual void printDot(std::ostream &Stream) const =0
bool isEnqueueBlocked() const
Definition: commands.hpp:167
void * MAddress
Reserved for storing the object address such as SPIR-V or memory object address.
Definition: commands.hpp:357
std::string MAddressString
Buffer to build the address string.
Definition: commands.hpp:359
void addBlockedUserUnique(const EventImplPtr &NewUser)
Definition: commands.hpp:174
bool MIsBlockable
Indicates whether the command can be blocked from enqueueing.
Definition: commands.hpp:323
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
Definition: commands.cpp:793
bool MTraceEventPrologComplete
Flag to indicate if makeTraceEventProlog() has been run.
Definition: commands.hpp:365
std::string MCommandNodeType
Buffer to build the command node type.
Definition: commands.hpp:361
void emitEdgeEventForEventDependence(Command *Cmd, ur_event_handle_t &EventAddr)
Creates an edge event when the dependency is an event.
Definition: commands.cpp:650
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
Definition: commands.cpp:800
void emitEdgeEventForCommandDependence(Command *Cmd, void *ObjAddr, bool IsCommand, std::optional< access::mode > AccMode=std::nullopt)
Creates an edge event when the dependency is a command.
Definition: commands.cpp:599
const QueueImplPtr & getQueue() const
Definition: commands.hpp:181
Command(CommandType Type, QueueImplPtr Queue, ur_exp_command_buffer_handle_t CommandBuffer=nullptr, const std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints={})
It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents references to event_impl class mem...
Definition: commands.cpp:557
virtual const Requirement * getRequirement() const
Definition: commands.hpp:218
bool MFirstInstance
Flag to indicate if this is the first time we are seeing this payload.
Definition: commands.hpp:367
The empty command does nothing during enqueue.
Definition: commands.hpp:415
const Requirement * getRequirement() const final
Definition: commands.hpp:420
bool producesPiEvent() const final
Returns true iff the command produces a UR event on non-host devices.
Definition: commands.cpp:1807
void printDot(std::ostream &Stream) const final
Definition: commands.cpp:1789
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req)
Definition: commands.cpp:1745
void emitInstrumentationData() override
Instrumentation method which emits telemetry data.
Definition: commands.cpp:1761
The exec CG command enqueues execution of kernel or explicit memory operation.
Definition: commands.hpp:647
detail::CG & getCG() const
Definition: commands.hpp:662
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
Definition: commands.hpp:729
The map command enqueues mapping of device memory onto host memory.
Definition: commands.hpp:541
const Requirement * getRequirement() const final
Definition: commands.hpp:547
The mem copy host command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:604
const Requirement * getRequirement() const final
Definition: commands.hpp:611
The mem copy command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:580
const Requirement * getRequirement() const final
Definition: commands.hpp:587
The release command enqueues release of a memory object instance allocated on Host or underlying fram...
Definition: commands.hpp:439
The unmap command removes mapping of host memory onto device memory.
Definition: commands.hpp:560
const Requirement * getRequirement() const final
Definition: commands.hpp:566
const Requirement * getRequirement() const final
Definition: commands.hpp:716
The class is an impl counterpart of the sycl::kernel_bundle.
Class representing the implementation of command_graph<executable>.
RTDeviceBinaryImage * retrieveAMDGCNOrNVPTXKernelBinary(const DeviceImplPtr DeviceImpl, const std::string &KernelName)
std::vector< bool > KernelArgMask
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
Definition: commands.cpp:2259
void enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, const bool KernelUsesClusterLaunch, const RTDeviceBinaryImage *BinImage)
Definition: commands.cpp:2553
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
void SetArgBasedOnType(const PluginPtr &Plugin, ur_kernel_handle_t Kernel, const std::shared_ptr< device_image_impl > &DeviceImageImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex)
Definition: commands.cpp:2279
std::shared_ptr< detail::stream_impl > StreamImplPtr
Definition: commands.hpp:52
std::shared_ptr< event_impl > EventImplPtr
Definition: handler.hpp:183
std::shared_ptr< plugin > PluginPtr
Definition: ur.hpp:60
ur_result_t enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
Definition: commands.cpp:2451
std::shared_ptr< device_impl > DeviceImplPtr
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
Definition: commands.cpp:123
ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read)
Definition: commands.cpp:2670
auto tie(Ts &...Args)
Definition: tuple.hpp:39
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: helpers.hpp:45
Definition: access.hpp:18
bool MVisited
Used for marking the node as visited during graph traversal.
Definition: commands.hpp:329
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Definition: commands.hpp:331
Dependency between two commands.
Definition: commands.hpp:83
const Requirement * MDepRequirement
Requirement for the dependency.
Definition: commands.hpp:96
friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs)
Definition: commands.hpp:88
Command * MDepCommand
The actual dependency command.
Definition: commands.hpp:94
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
Definition: commands.hpp:99
DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd)
Definition: commands.hpp:84
Result of command enqueueing.
Definition: commands.hpp:64
ResultT MResult
Indicates the result of enqueueing.
Definition: commands.hpp:75
ur_result_t MErrCode
Error code which is set when enqueueing fails.
Definition: commands.hpp:79
EnqueueResultT(ResultT Result=SyclEnqueueSuccess, Command *Cmd=nullptr, ur_result_t ErrCode=UR_RESULT_SUCCESS)
Definition: commands.hpp:71
Command * MCmd
Pointer to the command which failed to enqueue.
Definition: commands.hpp:77