DPC++ Runtime
Runtime libraries for oneAPI DPC++
|
|
Go to the documentation of this file.
17 #include <unordered_set>
27 inline namespace _V1 {
30 #ifdef XPTI_ENABLE_INSTRUMENTATION
31 bool CurrentCodeLocationValid();
32 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
33 xpti_td *TraceEvent, uint16_t Type,
40 class DispatchHostTask;
42 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
43 using EventImplPtr = std::shared_ptr<detail::event_impl>;
121 const std::vector<sycl::detail::pi::PiExtSyncPoint> &SyncPoints = {});
127 std::vector<Command *> &ToCleanUp);
133 std::vector<Command *> &ToCleanUp);
148 std::vector<Command *> &ToCleanUp);
187 Command *Cmd,
void *ObjAddr,
bool IsCommand,
188 std::optional<access::mode> AccMode = std::nullopt);
207 virtual void printDot(std::ostream &Stream)
const = 0;
210 assert(
false &&
"Internal Error. The command has no stored requirement");
237 std::vector<sycl::detail::pi::PiEvent>
238 getPiEvents(
const std::vector<EventImplPtr> &EventImpls)
const;
242 std::vector<sycl::detail::pi::PiEvent>
275 std::vector<Command *> &ToCleanUp);
412 void printDot(std::ostream &Stream)
const final;
436 void printDot(std::ostream &Stream)
const final;
459 virtual void *getMemAllocation()
const = 0;
471 void *MMemAllocation =
nullptr;
480 bool MIsActive = true;
484 bool MIsLeaderAlloca = true;
486 bool MIsConst = false;
498 bool InitFromUserData =
true,
500 bool IsConst =
false);
503 void printDot(std::ostream &Stream)
const final;
511 bool MInitFromUserData = false;
519 std::vector<Command *> &ToEnqueue,
520 std::vector<Command *> &ToCleanUp);
522 void *getMemAllocation()
const final;
523 void printDot(std::ostream &Stream)
const final;
539 void printDot(std::ostream &Stream)
const final;
548 void **MDstPtr =
nullptr;
549 access::
mode MMapMode;
558 void printDot(std::ostream &Stream)
const final;
568 void **MSrcPtr =
nullptr;
579 void printDot(std::ostream &Stream)
const final;
603 void printDot(std::ostream &Stream)
const final;
615 void **MDstPtr =
nullptr;
620 bool blocking,
void *ptr,
size_t size,
628 const
std::
string &KernelName,
631 const
std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
641 std::unique_ptr<detail::CG> CommandGroup,
QueueImplPtr Queue,
643 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
645 std::vector<std::shared_ptr<const void>> getAuxiliaryResources()
const;
647 void clearAuxiliaryResources();
649 void printDot(std::ostream &Stream)
const final;
678 std::unique_ptr<detail::
CG> MCommandGroup;
686 #ifdef XPTI_ENABLE_INSTRUMENTATION
687 std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
688 int32_t StreamID,
const std::shared_ptr<detail::kernel_impl> &SyclKernel,
692 std::vector<ArgDesc> &CGArgs);
700 void printDot(std::ostream &Stream)
const final;
709 void **MDstPtr =
nullptr;
720 void printDot(std::ostream &Stream)
const final;
733 void setFusionStatus(FusionStatus Status);
740 bool isActive()
const {
return MStatus == FusionStatus::ACTIVE; }
751 FusionStatus MStatus;
761 const
std::function<
void *(
Requirement *Req)> &getMemAllocationFunc);
769 const
std::function<
void *(
Requirement *Req)> &getMemAllocationFunc,
771 size_t NextTrueIndex);
775 std::function<
void(detail::
ArgDesc &Arg,
int NextTrueIndex)> Func);
void copySubmissionCodeLocation()
const Requirement * getRequirement() const final
The Command class represents some action that needs to be performed on one or more memory objects.
ResultT MResult
Indicates the result of enqueueing.
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
const Requirement * getRequirement() const final
void emitInstrumentationData() override
Instrumentation method which emits telemetry data.
virtual void emitInstrumentationData()=0
Instrumentation method which emits telemetry data.
bool MShouldCompleteEventIfPossible
bool isSuccessfullyEnqueued() const
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
std::vector< sycl::detail::pi::PiEvent > getPiEventsBlocking(const std::vector< EventImplPtr > &EventImpls) const
Collect PI events from EventImpls and filter out some of them in case of in order queue.
virtual const Requirement * getRequirement() const
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
Marks MMarks
Used for marking the node during graph traversal.
std::string MSubmissionFunctionName
bool MTraceEventPrologComplete
Flag to indicate if makeTraceEventProlog() has been run.
::pi_kernel_cache_config PiKernelCacheConfig
void emitEnqueuedEventSignal(sycl::detail::pi::PiEvent &PiEventAddr)
Creates a signal event with the enqueued kernel event handle.
uint64_t MInstanceID
Instance ID tracked for the command.
std::shared_ptr< device_impl > DeviceImplPtr
const Requirement * getRequirement() const final
void emitEdgeEventForCommandDependence(Command *Cmd, void *ObjAddr, bool IsCommand, std::optional< access::mode > AccMode=std::nullopt)
Creates an edge event when the dependency is a command.
Result of command enqueueing.
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
void printDot(std::ostream &Stream) const final
The map command enqueues mapping of device memory onto host memory.
const QueueImplPtr & getQueue() const
void emitInstrumentation(uint16_t Type, const char *Txt=nullptr)
Emits an event of Type.
void makeTraceEventEpilog()
If prolog has been run, run epilog; this must be guarded by a check for xptiTraceEnabled().
bool MIsBlockable
Indicates whether the command can be blocked from enqueueing.
Command * processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform glueing of events from different contexts.
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req)
The empty command does nothing during enqueue.
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
virtual void printDot(std::ostream &Stream) const =0
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
std::vector< sycl::detail::pi::PiEvent > getPiEvents(const std::vector< EventImplPtr > &EventImpls) const
Collect PI events from EventImpls and filter out some of them in case of in order queue.
void addUser(Command *NewUser)
std::vector< sycl::detail::pi::PiExtSyncPoint > MSyncPointDeps
List of sync points for submissions to a command buffer.
detail::CG & getCG() const
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
const char * getBlockReason() const
bool producesPiEvent() const final
Returns true iff the command produces a PI event on non-host devices.
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
void emitInstrumentationDataProxy()
Proxy method which calls emitInstrumentationData.
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
bool MFirstInstance
Flag to indicate if this is the first time we are seeing this payload.
const Requirement * getRequirement() const final
const Requirement * getRequirement() const final
The release command enqueues release of a memory object instance allocated on Host or underlying fram...
DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd)
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
void emitEdgeEventForEventDependence(Command *Cmd, sycl::detail::pi::PiEvent &EventAddr)
Creates an edge event when the dependency is an event.
const Requirement * MDepRequirement
Requirement for the dependency.
Command * MDepCommand
The actual dependency command.
std::mutex MBlockedUsersMutex
Command * MCmd
Pointer to the command which failed to enqueue.
The alloca command enqueues allocation of instance of memory object on Host or underlying framework.
CommandType getType() const
void addBlockedUserUnique(const EventImplPtr &NewUser)
void waitForEvents(QueueImplPtr Queue, std::vector< EventImplPtr > &RawEvents, sycl::detail::pi::PiEvent &Event)
code_location MSubmissionCodeLocation
Represents code location of command submission to SYCL API, assigned with the valid value only if com...
std::vector< bool > KernelArgMask
std::string MCommandNodeType
Buffer to build the command node type.
Dependency between two commands.
std::mutex MEnqueueMtx
Mutex used to protect enqueueing from race conditions.
const EventImplPtr & getEvent() const
std::string MSubmissionFileName
Introduces string to handle memory management since code_location struct works with raw char arrays.
::pi_ext_sync_point PiExtSyncPoint
The mem copy command enqueues memory copy between two instances of memory object.
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
std::shared_ptr< event_impl > EventImplPtr
void clearAllDependencies()
Clear all dependency events This should only be used if a command is about to be deleted without bein...
"Execute kernel" command group class.
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
void waitForPreparedHostEvents() const
The exec CG command enqueues execution of kernel or explicit memory operation.
std::shared_ptr< detail::stream_impl > StreamImplPtr
bool MVisited
Used for marking the node as visited during graph traversal.
const Requirement * getRequirement() const final
const Requirement * getRequirement() const final
void * MAddress
Reserved for storing the object address such as SPIR-V or memory object address.
AllocaCommandBase * getParentAlloca()
pi_int32 MErrCode
Error code which is set when enqueueing fails.
void * MTraceEvent
The event for node_create and task_begin.
std::string MCommandName
Buffer to build the command end-user understandable name.
pi_int32 enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const CGExecKernel &CommandGroup, std::vector< sycl::detail::pi::PiExtSyncPoint > &SyncPoints, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
void * getMemAllocation() const final
QueueImplPtr MWorkerQueue
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
uint64_t makeTraceEventProlog(void *MAddress)
Create a trace event of node_create type; this must be guarded by a check for xptiTraceEnabled().
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
EmptyCommand(QueueImplPtr Queue)
friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs)
::pi_ext_command_buffer PiExtCommandBuffer
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
sycl::detail::pi::PiExtCommandBuffer getCommandBuffer() const
Gets the command buffer (if any) associated with this command.
std::string MAddressString
Buffer to build the address string.
The class is an impl counterpart of the sycl::kernel_bundle.
SYCLMemObjI * getSYCLMemObj() const
std::vector< EventImplPtr > & MPreparedHostDepsEvents
Command(CommandType Type, QueueImplPtr Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &SyncPoints={})
It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents references to event_impl class mem...
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector< Command * > &ToCleanUp)
Checks if the command is enqueued, and calls enqueueImp.
std::vector< EventImplPtr > & MPreparedDepsEvents
Dependency events prepared for waiting by backend.
int32_t MStreamID
The stream under which the traces are emitted.
Base class for all types of command groups.
pi_int32 enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read)
bool readyForDeletion() const
Base class for memory allocation commands.
const QueueImplPtr & getWorkerQueue() const
Get the queue this command will be submitted to.
The unmap command removes mapping of host memory onto device memory.
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
ReleaseCommand * getReleaseCmd()
CommandType MType
The type of the command.
The mem copy host command enqueues memory copy between two instances of memory object.
sycl::detail::pi::PiExtCommandBuffer MCommandBuffer
CommandBuffer which will be used to submit to instead of the queue, if set.
void SetArgBasedOnType(const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel, const std::shared_ptr< device_image_impl > &DeviceImageImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg, size_t NextTrueIndex)
virtual bool producesPiEvent() const
Returns true iff the command produces a PI event on non-host devices.
EnqueueResultT(ResultT Result=SyclEnqueueSuccess, Command *Cmd=nullptr, pi_int32 ErrCode=PI_SUCCESS)
bool isEnqueueBlocked() const
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig)
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
virtual const ContextImplPtr & getWorkerContext() const
Get the context of the queue this command will be submitted to.
The context class represents a SYCL context on which kernel functions may be executed.
virtual pi_int32 enqueueImp()=0
Private interface. Derived classes should implement this method.