17 #include <unordered_set>
27 inline namespace _V1 {
29 namespace ext::oneapi::experimental::detail {
30 class exec_graph_impl;
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 bool CurrentCodeLocationValid();
37 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
38 xpti_td *TraceEvent, uint16_t Type,
45 class DispatchHostTask;
47 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
48 using EventImplPtr = std::shared_ptr<detail::event_impl>;
127 const std::vector<sycl::detail::pi::PiExtSyncPoint> &SyncPoints = {});
133 std::vector<Command *> &ToCleanUp);
139 std::vector<Command *> &ToCleanUp);
154 std::vector<Command *> &ToCleanUp);
193 Command *Cmd,
void *ObjAddr,
bool IsCommand,
194 std::optional<access::mode> AccMode = std::nullopt);
213 virtual void printDot(std::ostream &Stream)
const = 0;
216 assert(
false &&
"Internal Error. The command has no stored requirement");
243 std::vector<sycl::detail::pi::PiEvent>
244 getPiEvents(
const std::vector<EventImplPtr> &EventImpls)
const;
248 std::vector<sycl::detail::pi::PiEvent>
283 std::vector<Command *> &ToCleanUp);
420 void printDot(std::ostream &Stream)
const final;
444 void printDot(std::ostream &Stream)
const final;
479 void *MMemAllocation =
nullptr;
488 bool MIsActive = true;
492 bool MIsLeaderAlloca = true;
494 bool MIsConst = false;
506 bool InitFromUserData =
true,
508 bool IsConst =
false);
511 void printDot(std::ostream &Stream)
const final;
519 bool MInitFromUserData = false;
527 std::vector<Command *> &ToEnqueue,
528 std::vector<Command *> &ToCleanUp);
530 void *getMemAllocation()
const final;
531 void printDot(std::ostream &Stream)
const final;
547 void printDot(std::ostream &Stream)
const final;
556 void **MDstPtr =
nullptr;
557 access::
mode MMapMode;
566 void printDot(std::ostream &Stream)
const final;
576 void **MSrcPtr =
nullptr;
587 void printDot(std::ostream &Stream)
const final;
611 void printDot(std::ostream &Stream)
const final;
623 void **MDstPtr =
nullptr;
628 bool blocking,
void *ptr,
size_t size,
636 const
std::
string &KernelName,
639 const
std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
641 bool KernelIsCooperative);
650 std::unique_ptr<detail::CG> CommandGroup,
QueueImplPtr Queue,
652 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
654 std::vector<std::shared_ptr<const void>> getAuxiliaryResources()
const;
656 void clearAuxiliaryResources();
658 void printDot(std::ostream &Stream)
const final;
660 std::string_view getTypeString()
const;
688 std::unique_ptr<detail::
CG> MCommandGroup;
696 #ifdef XPTI_ENABLE_INSTRUMENTATION
697 std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
698 int32_t StreamID,
const std::shared_ptr<detail::kernel_impl> &SyclKernel,
702 std::vector<ArgDesc> &CGArgs);
710 void printDot(std::ostream &Stream)
const final;
719 void **MDstPtr =
nullptr;
730 void printDot(std::ostream &Stream)
const final;
743 void setFusionStatus(FusionStatus Status);
750 bool isActive()
const {
return MStatus == FusionStatus::ACTIVE; }
761 FusionStatus MStatus;
769 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
772 void printDot(std::ostream &Stream)
const final;
780 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
789 std::vector<sycl::detail::pi::PiExtSyncPoint> &SyncPoints,
792 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc);
799 const std::shared_ptr<device_image_impl> &DeviceImageImpl,
800 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
802 size_t NextTrueIndex);
805 const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
The context class represents a SYCL context on which kernel functions may be executed.
Base class for memory allocation commands.
ReleaseCommand * getReleaseCmd()
const Requirement * getRequirement() const final
virtual void * getMemAllocation() const =0
SYCLMemObjI * getSYCLMemObj() const
The alloca command enqueues allocation of instance of memory object on Host or underlying framework.
void * getMemAllocation() const final
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
AllocaCommandBase * getParentAlloca()
"Execute kernel" command group class.
Base class for all types of command groups.
The Command class represents some action that needs to be performed on one or more memory objects.
bool MShouldCompleteEventIfPossible
void copySubmissionCodeLocation()
bool isSuccessfullyEnqueued() const
Command * processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform glueing of events from different contexts.
void * MTraceEvent
The event for node_create and task_begin.
CommandType MType
The type of the command.
virtual bool producesPiEvent() const
Returns true iff the command produces a PI event on non-host devices.
void emitEnqueuedEventSignal(sycl::detail::pi::PiEvent &PiEventAddr)
Creates a signal event with the enqueued kernel event handle.
int32_t MStreamID
The stream under which the traces are emitted.
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
void emitInstrumentation(uint16_t Type, const char *Txt=nullptr)
Emits an event of Type.
virtual void emitInstrumentationData()=0
Instrumentation method which emits telemetry data.
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
std::string MCommandName
Buffer to build the command end-user understandable name.
sycl::detail::pi::PiExtCommandBuffer getCommandBuffer() const
Gets the command buffer (if any) associated with this command.
virtual pi_int32 enqueueImp()=0
Private interface. Derived classes should implement this method.
std::mutex MBlockedUsersMutex
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
void emitEdgeEventForEventDependence(Command *Cmd, sycl::detail::pi::PiEvent &EventAddr)
Creates an edge event when the dependency is an event.
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector< Command * > &ToCleanUp)
Checks if the command is enqueued, and calls enqueueImp.
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
std::vector< sycl::detail::pi::PiEvent > getPiEvents(const std::vector< EventImplPtr > &EventImpls) const
Collect PI events from EventImpls and filter out some of them in case of in order queue.
void waitForPreparedHostEvents() const
std::string MSubmissionFileName
Introduces string to handle memory management since code_location struct works with raw char arrays.
std::mutex MEnqueueMtx
Mutex used to protect enqueueing from race conditions.
virtual const ContextImplPtr & getWorkerContext() const
Get the context of the queue this command will be submitted to.
void emitInstrumentationDataProxy()
Proxy method which calls emitInstrumentationData.
code_location MSubmissionCodeLocation
Represents code location of command submission to SYCL API, assigned with the valid value only if com...
void makeTraceEventEpilog()
If prolog has been run, run epilog; this must be guarded by a check for xptiTraceEnabled().
std::vector< sycl::detail::pi::PiEvent > getPiEventsBlocking(const std::vector< EventImplPtr > &EventImpls) const
Collect PI events from EventImpls and filter out some of them in case of in order queue.
Marks MMarks
Used for marking the node during graph traversal.
std::vector< EventImplPtr > & MPreparedHostDepsEvents
std::vector< EventImplPtr > & MPreparedDepsEvents
Dependency events prepared for waiting by backend.
Command(CommandType Type, QueueImplPtr Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &SyncPoints={})
It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents references to event_impl class mem...
std::string MSubmissionFunctionName
uint64_t MInstanceID
Instance ID tracked for the command.
const QueueImplPtr & getWorkerQueue() const
Get the queue this command will be submitted to.
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
const char * getBlockReason() const
std::vector< sycl::detail::pi::PiExtSyncPoint > MSyncPointDeps
List of sync points for submissions to a command buffer.
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
void addUser(Command *NewUser)
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
void clearAllDependencies()
Clear all dependency events This should only be used if a command is about to be deleted without bein...
const EventImplPtr & getEvent() const
uint64_t makeTraceEventProlog(void *MAddress)
Create a trace event of node_create type; this must be guarded by a check for xptiTraceEnabled().
CommandType getType() const
virtual void printDot(std::ostream &Stream) const =0
bool isEnqueueBlocked() const
void * MAddress
Reserved for storing the object address such as SPIR-V or memory object address.
std::string MAddressString
Buffer to build the address string.
void waitForEvents(QueueImplPtr Queue, std::vector< EventImplPtr > &RawEvents, sycl::detail::pi::PiEvent &Event)
QueueImplPtr MWorkerQueue
void addBlockedUserUnique(const EventImplPtr &NewUser)
bool MIsBlockable
Indicates whether the command can be blocked from enqueueing.
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
bool MTraceEventPrologComplete
Flag to indicate if makeTraceEventProlog() has been run.
sycl::detail::pi::PiExtCommandBuffer MCommandBuffer
CommandBuffer which will be used to submit to instead of the queue, if set.
std::string MCommandNodeType
Buffer to build the command node type.
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
void emitEdgeEventForCommandDependence(Command *Cmd, void *ObjAddr, bool IsCommand, std::optional< access::mode > AccMode=std::nullopt)
Creates an edge event when the dependency is a command.
const QueueImplPtr & getQueue() const
virtual const Requirement * getRequirement() const
bool MFirstInstance
Flag to indicate if this is the first time we are seeing this payload.
The empty command does nothing during enqueue.
const Requirement * getRequirement() const final
bool producesPiEvent() const final
Returns true iff the command produces a PI event on non-host devices.
void printDot(std::ostream &Stream) const final
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req)
void emitInstrumentationData() override
Instrumentation method which emits telemetry data.
EmptyCommand(QueueImplPtr Queue)
The exec CG command enqueues execution of kernel or explicit memory operation.
detail::CG & getCG() const
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
bool readyForDeletion() const
The map command enqueues mapping of device memory onto host memory.
const Requirement * getRequirement() const final
The mem copy host command enqueues memory copy between two instances of memory object.
const Requirement * getRequirement() const final
The mem copy command enqueues memory copy between two instances of memory object.
const Requirement * getRequirement() const final
The release command enqueues release of a memory object instance allocated on Host or underlying fram...
The unmap command removes mapping of host memory onto device memory.
const Requirement * getRequirement() const final
const Requirement * getRequirement() const final
The class is an impl counterpart of the sycl::kernel_bundle.
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Class representing the implementation of command_graph<executable>.
::pi_kernel_cache_config PiKernelCacheConfig
::pi_ext_sync_point PiExtSyncPoint
::pi_ext_command_buffer_command PiExtCommandBufferCommand
std::vector< bool > KernelArgMask
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
pi_int32 enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read)
std::shared_ptr< detail::stream_impl > StreamImplPtr
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative)
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
void SetArgBasedOnType(const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel, const std::shared_ptr< device_image_impl > &DeviceImageImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg, size_t NextTrueIndex)
pi_int32 enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const CGExecKernel &CommandGroup, std::vector< sycl::detail::pi::PiExtSyncPoint > &SyncPoints, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint, sycl::detail::pi::PiExtCommandBufferCommand *OutCommand, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
bool MVisited
Used for marking the node as visited during graph traversal.
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Dependency between two commands.
const Requirement * MDepRequirement
Requirement for the dependency.
friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs)
Command * MDepCommand
The actual dependency command.
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd)
Result of command enqueueing.
ResultT MResult
Indicates the result of enqueueing.
pi_int32 MErrCode
Error code which is set when enqueueing fails.
EnqueueResultT(ResultT Result=SyclEnqueueSuccess, Command *Cmd=nullptr, pi_int32 ErrCode=PI_SUCCESS)
Command * MCmd
Pointer to the command which failed to enqueue.