17 #include <unordered_set>
27 inline namespace _V1 {
29 namespace ext::oneapi::experimental::detail {
30 class exec_graph_impl;
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
37 xpti_td *TraceEvent, uint16_t Type,
42 const std::string &KernelName);
49 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
50 using EventImplPtr = std::shared_ptr<detail::event_impl>;
72 ur_result_t ErrCode = UR_RESULT_SUCCESS)
129 ur_exp_command_buffer_handle_t CommandBuffer =
nullptr,
130 const std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints = {});
136 std::vector<Command *> &ToCleanUp);
142 std::vector<Command *> &ToCleanUp);
157 std::vector<Command *> &ToCleanUp);
196 Command *Cmd,
void *ObjAddr,
bool IsCommand,
197 std::optional<access::mode> AccMode = std::nullopt);
200 ur_event_handle_t &EventAddr);
216 virtual void printDot(std::ostream &Stream)
const = 0;
219 assert(
false &&
"Internal Error. The command has no stored requirement");
242 std::vector<ur_event_handle_t>
243 getUrEvents(
const std::vector<EventImplPtr> &EventImpls)
const;
247 std::vector<ur_event_handle_t>
265 ur_event_handle_t &Event);
282 std::vector<Command *> &ToCleanUp);
419 void printDot(std::ostream &Stream)
const final;
429 ur_result_t enqueueImp() final;
443 void printDot(std::ostream &Stream)
const final;
450 ur_result_t enqueueImp()
final;
478 void *MMemAllocation =
nullptr;
487 bool MIsActive = true;
491 bool MIsLeaderAlloca = true;
493 bool MIsConst = false;
505 bool InitFromUserData =
true,
507 bool IsConst =
false);
510 void printDot(std::ostream &Stream)
const final;
514 ur_result_t enqueueImp() final;
518 bool MInitFromUserData = false;
526 std::vector<Command *> &ToEnqueue,
527 std::vector<Command *> &ToCleanUp);
529 void *getMemAllocation()
const final;
530 void printDot(std::ostream &Stream)
const final;
535 ur_result_t enqueueImp() final;
546 void printDot(std::ostream &Stream)
const final;
551 ur_result_t enqueueImp() final;
555 void **MDstPtr =
nullptr;
556 access::
mode MMapMode;
565 void printDot(std::ostream &Stream)
const final;
571 ur_result_t enqueueImp() final;
575 void **MSrcPtr =
nullptr;
586 void printDot(std::ostream &Stream)
const final;
593 ur_result_t enqueueImp() final;
610 void printDot(std::ostream &Stream)
const final;
616 ur_result_t enqueueImp() final;
622 void **MDstPtr =
nullptr;
626 const
std::
string &PipeName,
bool blocking,
627 void *ptr,
size_t size,
628 std::vector<ur_event_handle_t> &RawEvents,
636 const
std::
string &KernelName,
std::vector<ur_event_handle_t> &RawEvents,
638 const
std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
639 ur_kernel_cache_config_t KernelCacheConfig,
bool KernelIsCooperative,
640 const
bool KernelUsesClusterLaunch,
650 std::unique_ptr<detail::CG> CommandGroup,
QueueImplPtr Queue,
651 bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer =
nullptr,
652 const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies = {});
654 std::vector<std::shared_ptr<const void>> getAuxiliaryResources()
const;
656 void clearAuxiliaryResources();
658 void printDot(std::ostream &Stream)
const final;
660 std::string_view getTypeString()
const;
678 bool MEventNeeded =
true;
687 ur_result_t enqueueImp() final;
688 ur_result_t enqueueImpCommandBuffer();
689 ur_result_t enqueueImpQueue();
693 std::unique_ptr<detail::
CG> MCommandGroup;
701 #ifdef XPTI_ENABLE_INSTRUMENTATION
702 std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
703 int32_t StreamID,
const std::shared_ptr<detail::kernel_impl> &SyclKernel,
707 std::vector<ArgDesc> &CGArgs);
715 void printDot(std::ostream &Stream)
const final;
720 ur_result_t enqueueImp() final;
724 void **MDstPtr =
nullptr;
735 void printDot(std::ostream &Stream)
const final;
748 void setFusionStatus(FusionStatus Status);
755 bool isActive()
const {
return MStatus == FusionStatus::ACTIVE; }
760 ur_result_t enqueueImp() final;
766 FusionStatus MStatus;
774 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
777 void printDot(std::ostream &Stream)
const final;
782 ur_result_t enqueueImp()
final;
785 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
792 ur_exp_command_buffer_handle_t CommandBuffer,
794 std::vector<ur_exp_command_buffer_sync_point_t> &SyncPoints,
795 ur_exp_command_buffer_sync_point_t *OutSyncPoint,
796 ur_exp_command_buffer_command_handle_t *OutCommand,
797 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc);
804 const std::shared_ptr<device_image_impl> &DeviceImageImpl,
805 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
809 const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
The context class represents a SYCL context on which kernel functions may be executed.
Base class for memory allocation commands.
ReleaseCommand * getReleaseCmd()
const Requirement * getRequirement() const final
virtual void * getMemAllocation() const =0
SYCLMemObjI * getSYCLMemObj() const
The alloca command enqueues allocation of instance of memory object on Host or underlying framework.
void * getMemAllocation() const final
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
AllocaCommandBase * getParentAlloca()
"Execute kernel" command group class.
Base class for all types of command groups.
The Command class represents some action that needs to be performed on one or more memory objects.
bool MShouldCompleteEventIfPossible
void copySubmissionCodeLocation()
bool isSuccessfullyEnqueued() const
virtual ur_result_t enqueueImp()=0
Private interface. Derived classes should implement this method.
Command * processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform glueing of events from different contexts.
void * MTraceEvent
The event for node_create and task_begin.
CommandType MType
The type of the command.
virtual bool producesPiEvent() const
Returns true iff the command produces a UR event on non-host devices.
int32_t MStreamID
The stream under which the traces are emitted.
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
void emitInstrumentation(uint16_t Type, const char *Txt=nullptr)
Emits an event of Type.
virtual void emitInstrumentationData()=0
Instrumentation method which emits telemetry data.
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
std::string MCommandName
Buffer to build the command end-user understandable name.
std::mutex MBlockedUsersMutex
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
std::vector< ur_exp_command_buffer_sync_point_t > MSyncPointDeps
List of sync points for submissions to a command buffer.
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector< Command * > &ToCleanUp)
Checks if the command is enqueued, and calls enqueueImp.
void waitForEvents(QueueImplPtr Queue, std::vector< EventImplPtr > &RawEvents, ur_event_handle_t &Event)
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
void waitForPreparedHostEvents() const
std::string MSubmissionFileName
Introduces string to handle memory management since code_location struct works with raw char arrays.
std::vector< ur_event_handle_t > getUrEvents(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
std::mutex MEnqueueMtx
Mutex used to protect enqueueing from race conditions.
void emitInstrumentationDataProxy()
Proxy method which calls emitInstrumentationData.
code_location MSubmissionCodeLocation
Represents code location of command submission to SYCL API, assigned with the valid value only if com...
void emitEnqueuedEventSignal(ur_event_handle_t &UrEventAddr)
Creates a signal event with the enqueued kernel event handle.
void makeTraceEventEpilog()
If prolog has been run, run epilog; this must be guarded by a check for xptiTraceEnabled().
Marks MMarks
Used for marking the node during graph traversal.
ur_exp_command_buffer_handle_t getCommandBuffer() const
Gets the command buffer (if any) associated with this command.
std::vector< EventImplPtr > & MPreparedHostDepsEvents
std::vector< EventImplPtr > & MPreparedDepsEvents
Dependency events prepared for waiting by backend.
std::string MSubmissionFunctionName
uint64_t MInstanceID
Instance ID tracked for the command.
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
const char * getBlockReason() const
std::vector< ur_event_handle_t > getUrEventsBlocking(const std::vector< EventImplPtr > &EventImpls) const
Collect UR events from EventImpls and filter out some of them in case of in order queue.
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
void addUser(Command *NewUser)
virtual ContextImplPtr getWorkerContext() const
Get the context of the queue this command will be submitted to.
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
void clearAllDependencies()
Clear all dependency events This should only be used if a command is about to be deleted without bein...
const EventImplPtr & getEvent() const
ur_exp_command_buffer_handle_t MCommandBuffer
CommandBuffer which will be used to submit to instead of the queue, if set.
uint64_t makeTraceEventProlog(void *MAddress)
Create a trace event of node_create type; this must be guarded by a check for xptiTraceEnabled().
CommandType getType() const
virtual void printDot(std::ostream &Stream) const =0
bool isEnqueueBlocked() const
void * MAddress
Reserved for storing the object address such as SPIR-V or memory object address.
std::string MAddressString
Buffer to build the address string.
QueueImplPtr MWorkerQueue
void addBlockedUserUnique(const EventImplPtr &NewUser)
bool MIsBlockable
Indicates whether the command can be blocked from enqueueing.
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
bool MTraceEventPrologComplete
Flag to indicate if makeTraceEventProlog() has been run.
std::string MCommandNodeType
Buffer to build the command node type.
void emitEdgeEventForEventDependence(Command *Cmd, ur_event_handle_t &EventAddr)
Creates an edge event when the dependency is an event.
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
void emitEdgeEventForCommandDependence(Command *Cmd, void *ObjAddr, bool IsCommand, std::optional< access::mode > AccMode=std::nullopt)
Creates an edge event when the dependency is a command.
const QueueImplPtr & getQueue() const
Command(CommandType Type, QueueImplPtr Queue, ur_exp_command_buffer_handle_t CommandBuffer=nullptr, const std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints={})
It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents references to event_impl class mem...
virtual const Requirement * getRequirement() const
bool MFirstInstance
Flag to indicate if this is the first time we are seeing this payload.
The empty command does nothing during enqueue.
const Requirement * getRequirement() const final
bool producesPiEvent() const final
Returns true iff the command produces a UR event on non-host devices.
void printDot(std::ostream &Stream) const final
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req)
void emitInstrumentationData() override
Instrumentation method which emits telemetry data.
The exec CG command enqueues execution of kernel or explicit memory operation.
detail::CG & getCG() const
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
bool readyForDeletion() const
The map command enqueues mapping of device memory onto host memory.
const Requirement * getRequirement() const final
The mem copy host command enqueues memory copy between two instances of memory object.
const Requirement * getRequirement() const final
The mem copy command enqueues memory copy between two instances of memory object.
const Requirement * getRequirement() const final
The release command enqueues release of a memory object instance allocated on Host or underlying fram...
The unmap command removes mapping of host memory onto device memory.
const Requirement * getRequirement() const final
const Requirement * getRequirement() const final
The class is an impl counterpart of the sycl::kernel_bundle.
Class representing the implementation of command_graph<executable>.
RTDeviceBinaryImage * retrieveAMDGCNOrNVPTXKernelBinary(const DeviceImplPtr DeviceImpl, const std::string &KernelName)
std::vector< bool > KernelArgMask
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
void enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, const bool KernelUsesClusterLaunch, const RTDeviceBinaryImage *BinImage)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
void SetArgBasedOnType(const PluginPtr &Plugin, ur_kernel_handle_t Kernel, const std::shared_ptr< device_image_impl > &DeviceImageImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex)
std::shared_ptr< detail::stream_impl > StreamImplPtr
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
ur_result_t enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, std::vector< ur_exp_command_buffer_sync_point_t > &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
std::shared_ptr< device_impl > DeviceImplPtr
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
bool MVisited
Used for marking the node as visited during graph traversal.
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Dependency between two commands.
const Requirement * MDepRequirement
Requirement for the dependency.
friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs)
Command * MDepCommand
The actual dependency command.
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd)
Result of command enqueueing.
ResultT MResult
Indicates the result of enqueueing.
ur_result_t MErrCode
Error code which is set when enqueueing fails.
EnqueueResultT(ResultT Result=SyclEnqueueSuccess, Command *Cmd=nullptr, ur_result_t ErrCode=UR_RESULT_SUCCESS)
Command * MCmd
Pointer to the command which failed to enqueue.