DPC++ Runtime
Runtime libraries for oneAPI DPC++
commands.hpp
Go to the documentation of this file.
1 //==-------------- commands.hpp - SYCL standard header file ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <atomic>
12 #include <cstdint>
13 #include <deque>
14 #include <memory>
15 #include <optional>
16 #include <set>
17 #include <unordered_set>
18 #include <vector>
19 
20 #include <detail/accessor_impl.hpp>
21 #include <detail/cg.hpp>
22 #include <detail/event_impl.hpp>
24 #include <sycl/access/access.hpp>
25 
26 namespace sycl {
27 inline namespace _V1 {
28 
29 namespace ext::oneapi::experimental::detail {
30 class exec_graph_impl;
31 class node_impl;
32 } // namespace ext::oneapi::experimental::detail
33 namespace detail {
34 
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
37  xpti_td *TraceEvent, uint16_t Type,
38  const void *Addr);
39 #endif
40 RTDeviceBinaryImage *
42  const std::string &KernelName);
43 
44 class queue_impl;
45 class event_impl;
46 class context_impl;
47 class DispatchHostTask;
48 
49 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
50 using EventImplPtr = std::shared_ptr<detail::event_impl>;
51 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
52 using StreamImplPtr = std::shared_ptr<detail::stream_impl>;
53 
54 class Command;
55 class AllocaCommand;
56 class AllocaCommandBase;
57 class ReleaseCommand;
58 class ExecCGCommand;
59 class EmptyCommand;
60 
62 
65  enum ResultT {
70  };
72  ur_result_t ErrCode = UR_RESULT_SUCCESS)
73  : MResult(Result), MCmd(Cmd), MErrCode(ErrCode) {}
79  ur_result_t MErrCode;
80 };
81 
83 struct DepDesc {
84  DepDesc(Command *DepCommand, const Requirement *Req,
85  AllocaCommandBase *AllocaCmd)
86  : MDepCommand(DepCommand), MDepRequirement(Req), MAllocaCmd(AllocaCmd) {}
87 
88  friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs) {
89  return std::tie(Lhs.MDepRequirement, Lhs.MDepCommand) <
91  }
92 
94  Command *MDepCommand = nullptr;
96  const Requirement *MDepRequirement = nullptr;
100 };
101 
109 class Command {
110 public:
111  enum CommandType {
124  };
125 
126  Command(
127  CommandType Type, QueueImplPtr Queue,
128  ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
129  const std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints = {});
130 
134  [[nodiscard]] Command *addDep(DepDesc NewDep,
135  std::vector<Command *> &ToCleanUp);
136 
140  [[nodiscard]] Command *addDep(EventImplPtr Event,
141  std::vector<Command *> &ToCleanUp);
142 
143  void addUser(Command *NewUser) { MUsers.insert(NewUser); }
144 
146  CommandType getType() const { return MType; }
147 
155  virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
156  std::vector<Command *> &ToCleanUp);
157 
158  bool isFinished();
159 
160  bool isSuccessfullyEnqueued() const {
162  }
163 
164  // Shows that command could not be enqueued, now it may be true for empty task
165  // only
166  bool isEnqueueBlocked() const {
168  }
169  // Shows that command could be enqueued, but is blocking enqueue of all
170  // commands depending on it. Regular usage - host task.
171  bool isBlocking() const { return isHostTask() && !MEvent->isCompleted(); }
172 
173  void addBlockedUserUnique(const EventImplPtr &NewUser) {
174  if (std::find(MBlockedUsers.begin(), MBlockedUsers.end(), NewUser) !=
175  MBlockedUsers.end())
176  return;
177  MBlockedUsers.push_back(NewUser);
178  }
179 
180  const QueueImplPtr &getQueue() const { return MQueue; }
181 
182  const EventImplPtr &getEvent() const { return MEvent; }
183 
184  // Methods needed to support SYCL instrumentation
185 
189  virtual void emitInstrumentationData() = 0;
192  void resolveReleaseDependencies(std::set<Command *> &list);
195  Command *Cmd, void *ObjAddr, bool IsCommand,
196  std::optional<access::mode> AccMode = std::nullopt);
199  ur_event_handle_t &EventAddr);
201  void emitEnqueuedEventSignal(const ur_event_handle_t UrEventAddr);
206  uint64_t makeTraceEventProlog(void *MAddress);
209  void makeTraceEventEpilog();
211  void emitInstrumentation(uint16_t Type, const char *Txt = nullptr);
212 
213  // End Methods needed to support SYCL instrumentation
214 
215  virtual void printDot(std::ostream &Stream) const = 0;
216 
217  virtual const Requirement *getRequirement() const {
218  assert(false && "Internal Error. The command has no stored requirement");
219  return nullptr;
220  }
221 
222  virtual ~Command() { MEvent->cleanDepEventsThroughOneLevel(); }
223 
224  const char *getBlockReason() const;
225 
228  virtual ContextImplPtr getWorkerContext() const;
229 
231  virtual bool producesPiEvent() const;
232 
234  virtual bool supportsPostEnqueueCleanup() const;
235 
237  virtual bool readyForCleanup() const;
238 
241  std::vector<ur_event_handle_t>
242  getUrEvents(const std::vector<EventImplPtr> &EventImpls) const;
246  std::vector<ur_event_handle_t>
247  getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls) const;
248 
249  bool isHostTask() const;
250 
251  bool isFusable() const;
252 
253 protected:
257 
260  std::vector<EventImplPtr> &MPreparedDepsEvents;
261  std::vector<EventImplPtr> &MPreparedHostDepsEvents;
262 
263  void waitForEvents(QueueImplPtr Queue, std::vector<EventImplPtr> &RawEvents,
264  ur_event_handle_t &Event);
265 
266  void waitForPreparedHostEvents() const;
267 
279  [[nodiscard]] Command *processDepEvent(EventImplPtr DepEvent,
280  const DepDesc &Dep,
281  std::vector<Command *> &ToCleanUp);
282 
284  virtual ur_result_t enqueueImp() = 0;
285 
289  std::mutex MEnqueueMtx;
290 
291  friend class DispatchHostTask;
292 
293 public:
294  const std::vector<EventImplPtr> &getPreparedHostDepsEvents() const {
296  }
297 
298  const std::vector<EventImplPtr> &getPreparedDepsEvents() const {
299  return MPreparedDepsEvents;
300  }
301 
302  // XPTI instrumentation. Copy code location details to the internal struct.
303  // Memory is allocated in this method and released in destructor.
305 
307  std::vector<DepDesc> MDeps;
309  std::unordered_set<Command *> MUsers;
311  bool MIsBlockable = false;
313  unsigned MLeafCounter = 0;
314 
315  struct Marks {
317  bool MVisited = false;
319  bool MToBeDeleted = false;
320  };
323 
324  enum class BlockReason : int { Unset = -1, HostAccessor = 0, HostTask };
325 
326  // Only have reasonable value while MIsBlockable is true
328 
330  std::atomic<EnqueueResultT::ResultT> MEnqueueStatus;
331 
332  // All member variables defined here are needed for the SYCL instrumentation
333  // layer. Do not guard these variables below with XPTI_ENABLE_INSTRUMENTATION
334  // to ensure we have the same object layout when the macro in the library and
335  // SYCL app are not the same.
336 
338  void *MTraceEvent = nullptr;
342  int32_t MStreamID = -1;
345  void *MAddress = nullptr;
347  std::string MAddressString;
349  std::string MCommandNodeType;
351  std::string MCommandName;
355  uint64_t MInstanceID = 0;
362  std::string MSubmissionFileName;
364 
365  // This flag allows to control whether event should be set complete
366  // after successfull enqueue of command. Event is considered as "host" event
367  // if there is no backend representation of event (i.e. getHandleRef() return
368  // reference to nullptr value). By default the flag is set to true due to most
369  // of host operations are synchronous. The only asynchronous operation
370  // currently is host-task.
372 
376  bool MMarkedForCleanup = false;
377 
383  std::vector<EventImplPtr> MBlockedUsers;
384  std::mutex MBlockedUsersMutex;
385 
386 protected:
388  ur_exp_command_buffer_handle_t getCommandBuffer() const {
389  return MCommandBuffer;
390  }
391 
394  ur_exp_command_buffer_handle_t MCommandBuffer;
396  std::vector<ur_exp_command_buffer_sync_point_t> MSyncPointDeps;
397 };
398 
401 class EmptyCommand : public Command {
402 public:
403  EmptyCommand();
404 
405  void printDot(std::ostream &Stream) const final;
406  const Requirement *getRequirement() const final { return &MRequirements[0]; }
407  void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd,
408  const Requirement *Req);
409 
410  void emitInstrumentationData() override;
411 
412  bool producesPiEvent() const final;
413 
414 private:
415  ur_result_t enqueueImp() final;
416 
417  // Employing deque here as it allows to push_back/emplace_back without
418  // invalidation of pointer or reference to stored data item regardless of
419  // iterator invalidation.
420  std::deque<Requirement> MRequirements;
421 };
422 
425 class ReleaseCommand : public Command {
426 public:
427  ReleaseCommand(QueueImplPtr Queue, AllocaCommandBase *AllocaCmd);
428 
429  void printDot(std::ostream &Stream) const final;
430  void emitInstrumentationData() override;
431  bool producesPiEvent() const final;
432  bool supportsPostEnqueueCleanup() const final;
433  bool readyForCleanup() const final;
434 
435 private:
436  ur_result_t enqueueImp() final;
437 
439  AllocaCommandBase *MAllocaCmd = nullptr;
440 };
441 
443 class AllocaCommandBase : public Command {
444 public:
446  AllocaCommandBase *LinkedAllocaCmd, bool IsConst);
447 
448  ReleaseCommand *getReleaseCmd() { return &MReleaseCmd; }
449 
450  SYCLMemObjI *getSYCLMemObj() const { return MRequirement.MSYCLMemObj; }
451 
452  virtual void *getMemAllocation() const = 0;
453 
454  const Requirement *getRequirement() const final { return &MRequirement; }
455 
456  void emitInstrumentationData() override;
457 
458  bool producesPiEvent() const final;
459 
460  bool supportsPostEnqueueCleanup() const final;
461 
462  bool readyForCleanup() const final;
463 
464  void *MMemAllocation = nullptr;
465 
471  AllocaCommandBase *MLinkedAllocaCmd = nullptr;
473  bool MIsActive = true;
474 
477  bool MIsLeaderAlloca = true;
478  // Indicates that the data in this allocation must not be modified
479  bool MIsConst = false;
480 
481 protected:
482  Requirement MRequirement;
483  ReleaseCommand MReleaseCmd;
484 };
485 
489 public:
491  bool InitFromUserData = true,
492  AllocaCommandBase *LinkedAllocaCmd = nullptr,
493  bool IsConst = false);
494 
495  void *getMemAllocation() const final { return MMemAllocation; }
496  void printDot(std::ostream &Stream) const final;
497  void emitInstrumentationData() override;
498 
499 private:
500  ur_result_t enqueueImp() final;
501 
504  bool MInitFromUserData = false;
505 };
506 
509 public:
511  AllocaCommandBase *ParentAlloca,
512  std::vector<Command *> &ToEnqueue,
513  std::vector<Command *> &ToCleanUp);
514 
515  void *getMemAllocation() const final;
516  void printDot(std::ostream &Stream) const final;
517  AllocaCommandBase *getParentAlloca() { return MParentAlloca; }
518  void emitInstrumentationData() override;
519 
520 private:
521  ur_result_t enqueueImp() final;
522 
523  AllocaCommandBase *MParentAlloca = nullptr;
524 };
525 
527 class MapMemObject : public Command {
528 public:
529  MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req, void **DstPtr,
530  QueueImplPtr Queue, access::mode MapMode);
531 
532  void printDot(std::ostream &Stream) const final;
533  const Requirement *getRequirement() const final { return &MSrcReq; }
534  void emitInstrumentationData() override;
535 
536 private:
537  ur_result_t enqueueImp() final;
538 
539  AllocaCommandBase *MSrcAllocaCmd = nullptr;
540  Requirement MSrcReq;
541  void **MDstPtr = nullptr;
542  access::mode MMapMode;
543 };
544 
546 class UnMapMemObject : public Command {
547 public:
548  UnMapMemObject(AllocaCommandBase *DstAllocaCmd, Requirement Req,
549  void **SrcPtr, QueueImplPtr Queue);
550 
551  void printDot(std::ostream &Stream) const final;
552  const Requirement *getRequirement() const final { return &MDstReq; }
553  void emitInstrumentationData() override;
554  bool producesPiEvent() const final;
555 
556 private:
557  ur_result_t enqueueImp() final;
558 
559  AllocaCommandBase *MDstAllocaCmd = nullptr;
560  Requirement MDstReq;
561  void **MSrcPtr = nullptr;
562 };
563 
566 class MemCpyCommand : public Command {
567 public:
568  MemCpyCommand(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd,
569  Requirement DstReq, AllocaCommandBase *DstAllocaCmd,
570  QueueImplPtr SrcQueue, QueueImplPtr DstQueue);
571 
572  void printDot(std::ostream &Stream) const final;
573  const Requirement *getRequirement() const final { return &MDstReq; }
574  void emitInstrumentationData() final;
575  ContextImplPtr getWorkerContext() const final;
576  bool producesPiEvent() const final;
577 
578 private:
579  ur_result_t enqueueImp() final;
580 
581  QueueImplPtr MSrcQueue;
582  Requirement MSrcReq;
583  AllocaCommandBase *MSrcAllocaCmd = nullptr;
584  Requirement MDstReq;
585  AllocaCommandBase *MDstAllocaCmd = nullptr;
586 };
587 
590 class MemCpyCommandHost : public Command {
591 public:
592  MemCpyCommandHost(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd,
593  Requirement DstReq, void **DstPtr, QueueImplPtr SrcQueue,
594  QueueImplPtr DstQueue);
595 
596  void printDot(std::ostream &Stream) const final;
597  const Requirement *getRequirement() const final { return &MDstReq; }
598  void emitInstrumentationData() final;
599  ContextImplPtr getWorkerContext() const final;
600 
601 private:
602  ur_result_t enqueueImp() final;
603 
604  QueueImplPtr MSrcQueue;
605  Requirement MSrcReq;
606  AllocaCommandBase *MSrcAllocaCmd = nullptr;
607  Requirement MDstReq;
608  void **MDstPtr = nullptr;
609 };
610 
611 ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue,
612  const std::string &PipeName, bool blocking,
613  void *ptr, size_t size,
614  std::vector<ur_event_handle_t> &RawEvents,
615  const detail::EventImplPtr &OutEventImpl,
616  bool read);
617 
618 void enqueueImpKernel(
619  const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector<ArgDesc> &Args,
620  const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
621  const std::shared_ptr<detail::kernel_impl> &MSyclKernel,
622  const std::string &KernelName, std::vector<ur_event_handle_t> &RawEvents,
623  const detail::EventImplPtr &Event,
624  const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
625  ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative,
626  const bool KernelUsesClusterLaunch,
627  const RTDeviceBinaryImage *BinImage = nullptr);
628 
631 class ExecCGCommand : public Command {
632 public:
634  std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
635  bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
636  const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies = {});
637 
638  std::vector<std::shared_ptr<const void>> getAuxiliaryResources() const;
639 
640  void clearAuxiliaryResources();
641 
642  void printDot(std::ostream &Stream) const final;
643  void emitInstrumentationData() final;
644  std::string_view getTypeString() const;
645 
646  detail::CG &getCG() const { return *MCommandGroup; }
647 
648  // MEmptyCmd is only employed if this command refers to host-task.
649  // The mechanism of lookup for single EmptyCommand amongst users of
650  // host-task-representing command is unreliable. This unreliability roots in
651  // the cleanup process.
652  EmptyCommand *MEmptyCmd = nullptr;
653 
654  // MEventNeeded is true if the command needs to produce a valid event. The
655  // implementation may elect to not produce events (native or SYCL) if this
656  // is false.
657  bool MEventNeeded = true;
658 
659  bool producesPiEvent() const final;
660 
661  bool supportsPostEnqueueCleanup() const final;
662 
663  bool readyForCleanup() const final;
664 
665 private:
666  ur_result_t enqueueImp() final;
667  ur_result_t enqueueImpCommandBuffer();
668  ur_result_t enqueueImpQueue();
669 
670  AllocaCommandBase *getAllocaForReq(Requirement *Req);
671 
672  std::unique_ptr<detail::CG> MCommandGroup;
673 
674  friend class Command;
675 };
676 
677 // For XPTI instrumentation only.
678 // Method used to emit data in cases when we do not create node in graph.
679 // Very close to ExecCGCommand::emitInstrumentationData content.
680 #ifdef XPTI_ENABLE_INSTRUMENTATION
681 std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
682  int32_t StreamID, const std::shared_ptr<detail::kernel_impl> &SyclKernel,
683  const detail::code_location &CodeLoc, const std::string &SyclKernelName,
684  const QueueImplPtr &Queue, const NDRDescT &NDRDesc,
685  const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
686  std::vector<ArgDesc> &CGArgs);
687 #endif
688 
690 public:
692  AllocaCommandBase *SrcAllocaCmd, void **DstPtr);
693 
694  void printDot(std::ostream &Stream) const final;
695  const Requirement *getRequirement() const final { return &MDstReq; }
696  void emitInstrumentationData() final;
697 
698 private:
699  ur_result_t enqueueImp() final;
700 
701  AllocaCommandBase *MSrcAllocaCmd = nullptr;
702  Requirement MDstReq;
703  void **MDstPtr = nullptr;
704 };
705 
707 public:
709  QueueImplPtr Queue,
711  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
712  Nodes);
713 
714  void printDot(std::ostream &Stream) const final;
715  void emitInstrumentationData() final;
716  bool producesPiEvent() const final;
717 
718 private:
719  ur_result_t enqueueImp() final;
720 
722  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
723  MNodes;
724 };
725 
726 // Enqueues a given kernel to a ur_exp_command_buffer_handle_t
728  context Ctx, DeviceImplPtr DeviceImpl,
729  ur_exp_command_buffer_handle_t CommandBuffer,
730  const CGExecKernel &CommandGroup,
731  std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints,
732  ur_exp_command_buffer_sync_point_t *OutSyncPoint,
733  ur_exp_command_buffer_command_handle_t *OutCommand,
734  const std::function<void *(Requirement *Req)> &getMemAllocationFunc);
735 
736 // Sets arguments for a given kernel and device based on the argument type.
737 // Refactored from SetKernelParamsAndLaunch to allow it to be used in the graphs
738 // extension.
739 void SetArgBasedOnType(
740  const detail::PluginPtr &Plugin, ur_kernel_handle_t Kernel,
741  const std::shared_ptr<device_image_impl> &DeviceImageImpl,
742  const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
743  const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex);
744 
746  const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
747  std::function<void(detail::ArgDesc &Arg, int NextTrueIndex)> Func);
748 
750 
751 } // namespace detail
752 } // namespace _V1
753 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
Base class for memory allocation commands.
Definition: commands.hpp:443
const Requirement * getRequirement() const final
Definition: commands.hpp:454
virtual void * getMemAllocation() const =0
SYCLMemObjI * getSYCLMemObj() const
Definition: commands.hpp:450
The alloca command enqueues allocation of instance of memory object on Host or underlying framework.
Definition: commands.hpp:488
void * getMemAllocation() const final
Definition: commands.hpp:495
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
Definition: commands.hpp:508
AllocaCommandBase * getParentAlloca()
Definition: commands.hpp:517
"Execute kernel" command group class.
Definition: cg.hpp:246
Base class for all types of command groups.
Definition: cg.hpp:160
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:109
bool isSuccessfullyEnqueued() const
Definition: commands.hpp:160
virtual ur_result_t enqueueImp()=0
Private interface. Derived classes should implement this method.
Command * processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform glueing of events from different contexts.
Definition: commands.cpp:752
void * MTraceEvent
The event for node_create and task_begin.
Definition: commands.hpp:338
CommandType MType
The type of the command.
Definition: commands.hpp:287
virtual bool producesPiEvent() const
Returns true iff the command produces a UR event on non-host devices.
Definition: commands.cpp:791
int32_t MStreamID
The stream under which the traces are emitted.
Definition: commands.hpp:342
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
Definition: commands.hpp:298
void emitInstrumentation(uint16_t Type, const char *Txt=nullptr)
Emits an event of Type.
Definition: commands.cpp:849
virtual void emitInstrumentationData()=0
Instrumentation method which emits telemetry data.
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
Definition: commands.hpp:294
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
Definition: commands.cpp:952
std::string MCommandName
Buffer to build the command end-user understandable name.
Definition: commands.hpp:351
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
Definition: commands.hpp:376
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:313
std::vector< ur_exp_command_buffer_sync_point_t > MSyncPointDeps
List of sync points for submissions to a command buffer.
Definition: commands.hpp:396
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector< Command * > &ToCleanUp)
Checks if the command is enqueued, and calls enqueueImp.
Definition: commands.cpp:860
void waitForEvents(QueueImplPtr Queue, std::vector< EventImplPtr > &RawEvents, ur_event_handle_t &Event)
Definition: commands.cpp:498
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:309
void waitForPreparedHostEvents() const
Definition: commands.cpp:493
std::string MSubmissionFileName
Introduces string to handle memory management since code_location struct works with raw char arrays.
Definition: commands.hpp:362
std::vector< ur_event_handle_t > getUrEvents(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
Definition: commands.cpp:232
std::mutex MEnqueueMtx
Mutex used to protect enqueueing from race conditions.
Definition: commands.hpp:289
void emitInstrumentationDataProxy()
Proxy method which calls emitInstrumentationData.
Definition: commands.cpp:581
code_location MSubmissionCodeLocation
Represents code location of command submission to SYCL API, assigned with the valid value only if com...
Definition: commands.hpp:359
void makeTraceEventEpilog()
If prolog has been run, run epilog; this must be guarded by a check for xptiTraceEnabled().
Definition: commands.cpp:739
Marks MMarks
Used for marking the node during graph traversal.
Definition: commands.hpp:322
ur_exp_command_buffer_handle_t getCommandBuffer() const
Gets the command buffer (if any) associated with this command.
Definition: commands.hpp:388
std::vector< EventImplPtr > & MPreparedHostDepsEvents
Definition: commands.hpp:261
void emitEnqueuedEventSignal(const ur_event_handle_t UrEventAddr)
Creates a signal event with the enqueued kernel event handle.
Definition: commands.cpp:840
std::vector< EventImplPtr > & MPreparedDepsEvents
Dependency events prepared for waiting by backend.
Definition: commands.hpp:260
std::string MSubmissionFunctionName
Definition: commands.hpp:363
uint64_t MInstanceID
Instance ID tracked for the command.
Definition: commands.hpp:355
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:307
const char * getBlockReason() const
Definition: commands.cpp:994
std::vector< ur_event_handle_t > getUrEventsBlocking(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
Definition: commands.cpp:257
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
Definition: commands.cpp:795
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
Definition: commands.hpp:383
void addUser(Command *NewUser)
Definition: commands.hpp:143
virtual ContextImplPtr getWorkerContext() const
Get the context of the queue this command will be submitted to.
Definition: commands.cpp:785
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
Definition: commands.hpp:330
const EventImplPtr & getEvent() const
Definition: commands.hpp:182
ur_exp_command_buffer_handle_t MCommandBuffer
CommandBuffer which will be used to submit to instead of the queue, if set.
Definition: commands.hpp:394
uint64_t makeTraceEventProlog(void *MAddress)
Create a trace event of node_create type; this must be guarded by a check for xptiTraceEnabled().
Definition: commands.cpp:706
CommandType getType() const
Definition: commands.hpp:146
virtual void printDot(std::ostream &Stream) const =0
bool isEnqueueBlocked() const
Definition: commands.hpp:166
void * MAddress
Reserved for storing the object address such as SPIR-V or memory object address.
Definition: commands.hpp:345
std::string MAddressString
Buffer to build the address string.
Definition: commands.hpp:347
void addBlockedUserUnique(const EventImplPtr &NewUser)
Definition: commands.hpp:173
bool MIsBlockable
Indicates whether the command can be blocked from enqueueing.
Definition: commands.hpp:311
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
Definition: commands.cpp:793
bool MTraceEventPrologComplete
Flag to indicate if makeTraceEventProlog() has been run.
Definition: commands.hpp:353
std::string MCommandNodeType
Buffer to build the command node type.
Definition: commands.hpp:349
void emitEdgeEventForEventDependence(Command *Cmd, ur_event_handle_t &EventAddr)
Creates an edge event when the dependency is an event.
Definition: commands.cpp:648
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
Definition: commands.cpp:800
void emitEdgeEventForCommandDependence(Command *Cmd, void *ObjAddr, bool IsCommand, std::optional< access::mode > AccMode=std::nullopt)
Creates an edge event when the dependency is a command.
Definition: commands.cpp:597
const QueueImplPtr & getQueue() const
Definition: commands.hpp:180
Command(CommandType Type, QueueImplPtr Queue, ur_exp_command_buffer_handle_t CommandBuffer=nullptr, const std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints={})
It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents references to event_impl class mem...
Definition: commands.cpp:555
virtual const Requirement * getRequirement() const
Definition: commands.hpp:217
The empty command does nothing during enqueue.
Definition: commands.hpp:401
const Requirement * getRequirement() const final
Definition: commands.hpp:406
bool producesPiEvent() const final
Returns true iff the command produces a UR event on non-host devices.
Definition: commands.cpp:1799
void printDot(std::ostream &Stream) const final
Definition: commands.cpp:1781
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req)
Definition: commands.cpp:1740
void emitInstrumentationData() override
Instrumentation method which emits telemetry data.
Definition: commands.cpp:1756
The exec CG command enqueues execution of kernel or explicit memory operation.
Definition: commands.hpp:631
detail::CG & getCG() const
Definition: commands.hpp:646
The map command enqueues mapping of device memory onto host memory.
Definition: commands.hpp:527
const Requirement * getRequirement() const final
Definition: commands.hpp:533
The mem copy host command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:590
const Requirement * getRequirement() const final
Definition: commands.hpp:597
The mem copy command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:566
const Requirement * getRequirement() const final
Definition: commands.hpp:573
The release command enqueues release of a memory object instance allocated on Host or underlying fram...
Definition: commands.hpp:425
The unmap command removes mapping of host memory onto device memory.
Definition: commands.hpp:546
const Requirement * getRequirement() const final
Definition: commands.hpp:552
const Requirement * getRequirement() const final
Definition: commands.hpp:695
The class is an impl counterpart of the sycl::kernel_bundle.
Class representing the implementation of command_graph<executable>.
RTDeviceBinaryImage * retrieveAMDGCNOrNVPTXKernelBinary(const DeviceImplPtr DeviceImpl, const std::string &KernelName)
std::vector< bool > KernelArgMask
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
Definition: commands.cpp:2244
void enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, const bool KernelUsesClusterLaunch, const RTDeviceBinaryImage *BinImage)
Definition: commands.cpp:2552
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
void SetArgBasedOnType(const PluginPtr &Plugin, ur_kernel_handle_t Kernel, const std::shared_ptr< device_image_impl > &DeviceImageImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex)
Definition: commands.cpp:2264
std::shared_ptr< detail::stream_impl > StreamImplPtr
Definition: commands.hpp:52
std::shared_ptr< event_impl > EventImplPtr
Definition: handler.hpp:183
std::shared_ptr< plugin > PluginPtr
Definition: ur.hpp:107
ur_result_t enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
Definition: commands.cpp:2448
std::shared_ptr< device_impl > DeviceImplPtr
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
Definition: commands.cpp:123
ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read)
Definition: commands.cpp:2669
auto tie(Ts &...Args)
Definition: tuple.hpp:39
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: helpers.hpp:45
Definition: access.hpp:18
bool MVisited
Used for marking the node as visited during graph traversal.
Definition: commands.hpp:317
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Definition: commands.hpp:319
Dependency between two commands.
Definition: commands.hpp:83
const Requirement * MDepRequirement
Requirement for the dependency.
Definition: commands.hpp:96
friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs)
Definition: commands.hpp:88
Command * MDepCommand
The actual dependency command.
Definition: commands.hpp:94
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
Definition: commands.hpp:99
DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd)
Definition: commands.hpp:84
Result of command enqueueing.
Definition: commands.hpp:64
ResultT MResult
Indicates the result of enqueueing.
Definition: commands.hpp:75
ur_result_t MErrCode
Error code which is set when enqueueing fails.
Definition: commands.hpp:79
EnqueueResultT(ResultT Result=SyclEnqueueSuccess, Command *Cmd=nullptr, ur_result_t ErrCode=UR_RESULT_SUCCESS)
Definition: commands.hpp:71
Command * MCmd
Pointer to the command which failed to enqueue.
Definition: commands.hpp:77