17 #include <unordered_set>
27 inline namespace _V1 {
29 namespace ext::oneapi::experimental::detail {
30 class exec_graph_impl;
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
37 xpti_td *TraceEvent, uint16_t Type,
42 const std::string &KernelName);
49 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
50 using EventImplPtr = std::shared_ptr<detail::event_impl>;
72 ur_result_t ErrCode = UR_RESULT_SUCCESS)
128 ur_exp_command_buffer_handle_t CommandBuffer =
nullptr,
129 const std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints = {});
135 std::vector<Command *> &ToCleanUp);
141 std::vector<Command *> &ToCleanUp);
156 std::vector<Command *> &ToCleanUp);
195 Command *Cmd,
void *ObjAddr,
bool IsCommand,
196 std::optional<access::mode> AccMode = std::nullopt);
199 ur_event_handle_t &EventAddr);
215 virtual void printDot(std::ostream &Stream)
const = 0;
218 assert(
false &&
"Internal Error. The command has no stored requirement");
241 std::vector<ur_event_handle_t>
242 getUrEvents(
const std::vector<EventImplPtr> &EventImpls)
const;
246 std::vector<ur_event_handle_t>
264 ur_event_handle_t &Event);
281 std::vector<Command *> &ToCleanUp);
405 void printDot(std::ostream &Stream)
const final;
415 ur_result_t enqueueImp() final;
429 void printDot(std::ostream &Stream)
const final;
436 ur_result_t enqueueImp()
final;
464 void *MMemAllocation =
nullptr;
473 bool MIsActive = true;
477 bool MIsLeaderAlloca = true;
479 bool MIsConst = false;
491 bool InitFromUserData =
true,
493 bool IsConst =
false);
496 void printDot(std::ostream &Stream)
const final;
500 ur_result_t enqueueImp() final;
504 bool MInitFromUserData = false;
512 std::vector<Command *> &ToEnqueue,
513 std::vector<Command *> &ToCleanUp);
515 void *getMemAllocation()
const final;
516 void printDot(std::ostream &Stream)
const final;
521 ur_result_t enqueueImp() final;
532 void printDot(std::ostream &Stream)
const final;
537 ur_result_t enqueueImp() final;
541 void **MDstPtr =
nullptr;
542 access::
mode MMapMode;
551 void printDot(std::ostream &Stream)
const final;
557 ur_result_t enqueueImp() final;
561 void **MSrcPtr =
nullptr;
572 void printDot(std::ostream &Stream)
const final;
579 ur_result_t enqueueImp() final;
596 void printDot(std::ostream &Stream)
const final;
602 ur_result_t enqueueImp() final;
608 void **MDstPtr =
nullptr;
612 const
std::
string &PipeName,
bool blocking,
613 void *ptr,
size_t size,
614 std::vector<ur_event_handle_t> &RawEvents,
622 const
std::
string &KernelName,
std::vector<ur_event_handle_t> &RawEvents,
624 const
std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
625 ur_kernel_cache_config_t KernelCacheConfig,
bool KernelIsCooperative,
626 const
bool KernelUsesClusterLaunch,
634 std::unique_ptr<detail::CG> CommandGroup,
QueueImplPtr Queue,
635 bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer =
nullptr,
636 const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies = {});
638 std::vector<std::shared_ptr<const void>> getAuxiliaryResources()
const;
640 void clearAuxiliaryResources();
642 void printDot(std::ostream &Stream)
const final;
644 std::string_view getTypeString()
const;
657 bool MEventNeeded =
true;
666 ur_result_t enqueueImp() final;
667 ur_result_t enqueueImpCommandBuffer();
668 ur_result_t enqueueImpQueue();
672 std::unique_ptr<detail::
CG> MCommandGroup;
680 #ifdef XPTI_ENABLE_INSTRUMENTATION
681 std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
682 int32_t StreamID,
const std::shared_ptr<detail::kernel_impl> &SyclKernel,
686 std::vector<ArgDesc> &CGArgs);
694 void printDot(std::ostream &Stream)
const final;
699 ur_result_t enqueueImp() final;
703 void **MDstPtr =
nullptr;
711 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
714 void printDot(std::ostream &Stream)
const final;
719 ur_result_t enqueueImp()
final;
722 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
729 ur_exp_command_buffer_handle_t CommandBuffer,
731 std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints,
732 ur_exp_command_buffer_sync_point_t *OutSyncPoint,
733 ur_exp_command_buffer_command_handle_t *OutCommand,
734 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc);
741 const std::shared_ptr<device_image_impl> &DeviceImageImpl,
742 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
746 const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
The context class represents a SYCL context on which kernel functions may be executed.
Base class for memory allocation commands.
ReleaseCommand * getReleaseCmd()
const Requirement * getRequirement() const final
virtual void * getMemAllocation() const =0
SYCLMemObjI * getSYCLMemObj() const
The alloca command enqueues allocation of instance of memory object on Host or underlying framework.
void * getMemAllocation() const final
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
AllocaCommandBase * getParentAlloca()
"Execute kernel" command group class.
Base class for all types of command groups.
The Command class represents some action that needs to be performed on one or more memory objects.
bool MShouldCompleteEventIfPossible
void copySubmissionCodeLocation()
bool isSuccessfullyEnqueued() const
virtual ur_result_t enqueueImp()=0
Private interface. Derived classes should implement this method.
Command * processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform glueing of events from different contexts.
void * MTraceEvent
The event for node_create and task_begin.
CommandType MType
The type of the command.
virtual bool producesPiEvent() const
Returns true iff the command produces a UR event on non-host devices.
int32_t MStreamID
The stream under which the traces are emitted.
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
void emitInstrumentation(uint16_t Type, const char *Txt=nullptr)
Emits an event of Type.
virtual void emitInstrumentationData()=0
Instrumentation method which emits telemetry data.
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
std::string MCommandName
Buffer to build the command end-user understandable name.
std::mutex MBlockedUsersMutex
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
std::vector< ur_exp_command_buffer_sync_point_t > MSyncPointDeps
List of sync points for submissions to a command buffer.
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector< Command * > &ToCleanUp)
Checks if the command is enqueued, and calls enqueueImp.
void waitForEvents(QueueImplPtr Queue, std::vector< EventImplPtr > &RawEvents, ur_event_handle_t &Event)
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
void waitForPreparedHostEvents() const
std::string MSubmissionFileName
Introduces string to handle memory management since code_location struct works with raw char arrays.
std::vector< ur_event_handle_t > getUrEvents(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
std::mutex MEnqueueMtx
Mutex used to protect enqueueing from race conditions.
void emitInstrumentationDataProxy()
Proxy method which calls emitInstrumentationData.
code_location MSubmissionCodeLocation
Represents code location of command submission to SYCL API, assigned with the valid value only if com...
void makeTraceEventEpilog()
If prolog has been run, run epilog; this must be guarded by a check for xptiTraceEnabled().
Marks MMarks
Used for marking the node during graph traversal.
ur_exp_command_buffer_handle_t getCommandBuffer() const
Gets the command buffer (if any) associated with this command.
std::vector< EventImplPtr > & MPreparedHostDepsEvents
void emitEnqueuedEventSignal(const ur_event_handle_t UrEventAddr)
Creates a signal event with the enqueued kernel event handle.
std::vector< EventImplPtr > & MPreparedDepsEvents
Dependency events prepared for waiting by backend.
std::string MSubmissionFunctionName
uint64_t MInstanceID
Instance ID tracked for the command.
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
const char * getBlockReason() const
std::vector< ur_event_handle_t > getUrEventsBlocking(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
void addUser(Command *NewUser)
virtual ContextImplPtr getWorkerContext() const
Get the context of the queue this command will be submitted to.
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
const EventImplPtr & getEvent() const
ur_exp_command_buffer_handle_t MCommandBuffer
CommandBuffer which will be used to submit to instead of the queue, if set.
uint64_t makeTraceEventProlog(void *MAddress)
Create a trace event of node_create type; this must be guarded by a check for xptiTraceEnabled().
CommandType getType() const
virtual void printDot(std::ostream &Stream) const =0
bool isEnqueueBlocked() const
void * MAddress
Reserved for storing the object address such as SPIR-V or memory object address.
std::string MAddressString
Buffer to build the address string.
QueueImplPtr MWorkerQueue
void addBlockedUserUnique(const EventImplPtr &NewUser)
bool MIsBlockable
Indicates whether the command can be blocked from enqueueing.
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
bool MTraceEventPrologComplete
Flag to indicate if makeTraceEventProlog() has been run.
std::string MCommandNodeType
Buffer to build the command node type.
void emitEdgeEventForEventDependence(Command *Cmd, ur_event_handle_t &EventAddr)
Creates an edge event when the dependency is an event.
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
void emitEdgeEventForCommandDependence(Command *Cmd, void *ObjAddr, bool IsCommand, std::optional< access::mode > AccMode=std::nullopt)
Creates an edge event when the dependency is a command.
const QueueImplPtr & getQueue() const
Command(CommandType Type, QueueImplPtr Queue, ur_exp_command_buffer_handle_t CommandBuffer=nullptr, const std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints={})
It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents references to event_impl class mem...
virtual const Requirement * getRequirement() const
The empty command does nothing during enqueue.
const Requirement * getRequirement() const final
bool producesPiEvent() const final
Returns true iff the command produces a UR event on non-host devices.
void printDot(std::ostream &Stream) const final
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req)
void emitInstrumentationData() override
Instrumentation method which emits telemetry data.
The exec CG command enqueues execution of kernel or explicit memory operation.
detail::CG & getCG() const
The map command enqueues mapping of device memory onto host memory.
const Requirement * getRequirement() const final
The mem copy host command enqueues memory copy between two instances of memory object.
const Requirement * getRequirement() const final
The mem copy command enqueues memory copy between two instances of memory object.
const Requirement * getRequirement() const final
The release command enqueues release of a memory object instance allocated on Host or underlying fram...
The unmap command removes mapping of host memory onto device memory.
const Requirement * getRequirement() const final
const Requirement * getRequirement() const final
The class is an impl counterpart of the sycl::kernel_bundle.
Class representing the implementation of command_graph<executable>.
RTDeviceBinaryImage * retrieveAMDGCNOrNVPTXKernelBinary(const DeviceImplPtr DeviceImpl, const std::string &KernelName)
std::vector< bool > KernelArgMask
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
void enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, const bool KernelUsesClusterLaunch, const RTDeviceBinaryImage *BinImage)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
void SetArgBasedOnType(const PluginPtr &Plugin, ur_kernel_handle_t Kernel, const std::shared_ptr< device_image_impl > &DeviceImageImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex)
std::shared_ptr< detail::stream_impl > StreamImplPtr
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
ur_result_t enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
std::shared_ptr< device_impl > DeviceImplPtr
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
bool MVisited
Used for marking the node as visited during graph traversal.
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Dependency between two commands.
const Requirement * MDepRequirement
Requirement for the dependency.
friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs)
Command * MDepCommand
The actual dependency command.
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd)
Result of command enqueueing.
ResultT MResult
Indicates the result of enqueueing.
ur_result_t MErrCode
Error code which is set when enqueueing fails.
EnqueueResultT(ResultT Result=SyclEnqueueSuccess, Command *Cmd=nullptr, ur_result_t ErrCode=UR_RESULT_SUCCESS)
Command * MCmd
Pointer to the command which failed to enqueue.