26 inline namespace _V1 {
47 #ifdef XPTI_ENABLE_INSTRUMENTATION
49 std::set<Command *> DepCommands;
51 std::vector<Command *> ToCleanUp;
57 throw runtime_error(
"Enqueue process failed.",
58 PI_ERROR_INVALID_OPERATION);
59 #ifdef XPTI_ENABLE_INSTRUMENTATION
61 DepCommands.insert(Cmd);
70 throw runtime_error(
"Enqueue process failed.",
71 PI_ERROR_INVALID_OPERATION);
72 #ifdef XPTI_ENABLE_INSTRUMENTATION
73 DepCommands.insert(Cmd);
81 Res, ToCleanUp, ReleaseCmd);
83 throw runtime_error(
"Enqueue process failed.",
84 PI_ERROR_INVALID_OPERATION);
85 #ifdef XPTI_ENABLE_INSTRUMENTATION
96 std::unique_ptr<detail::CG> CommandGroup,
const QueueImplPtr &Queue,
98 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies) {
100 const CG::CGTYPE Type = CommandGroup->getType();
101 std::vector<Command *> AuxiliaryCmds;
102 std::vector<StreamImplPtr> Streams;
105 auto *CGExecKernelPtr =
static_cast<CGExecKernel *
>(CommandGroup.get());
107 CGExecKernelPtr->clearStreams();
111 if (Queue->is_host()) {
113 Stream->initStreamHost(Queue);
117 std::vector<std::shared_ptr<const void>> AuxiliaryResources;
118 AuxiliaryResources = CommandGroup->getAuxiliaryResources();
119 CommandGroup->clearAuxiliaryResources();
121 bool ShouldEnqueue =
true;
135 NewCmd = Result.NewCmd;
136 NewEvent = Result.NewEvent;
137 ShouldEnqueue = Result.ShouldEnqueue;
142 std::move(Queue), AuxiliaryCmds,
143 CommandBuffer, std::move(Dependencies));
145 NewCmd = Result.NewCmd;
146 NewEvent = Result.NewEvent;
147 ShouldEnqueue = Result.ShouldEnqueue;
149 NewEvent->setSubmissionTime();
160 if (!AuxiliaryResources.empty())
167 std::vector<Command *> &AuxiliaryCmds,
169 std::vector<Command *> ToCleanUp;
174 (NewEvent) ?
static_cast<Command *
>(NewEvent->getCommand()) :
nullptr;
179 auto CleanUp = [&]() {
180 if (NewCmd && (NewCmd->
MDeps.size() == 0 && NewCmd->
MUsers.size() == 0)) {
182 NewEvent->setCommand(
nullptr);
188 for (
Command *Cmd : AuxiliaryCmds) {
193 throw runtime_error(
"Auxiliary enqueue process failed.",
194 PI_ERROR_INVALID_OPERATION);
199 std::rethrow_exception(std::current_exception());
208 NewCmd, Lock, Res, ToCleanUp, NewCmd, Blocking);
210 throw runtime_error(
"Enqueue process failed.",
211 PI_ERROR_INVALID_OPERATION);
216 std::rethrow_exception(std::current_exception());
224 std::vector<Command *> AuxiliaryCmds;
235 std::vector<Command *> ToCleanUp;
241 for (
Command *Cmd : AuxiliaryCmds) {
244 throw runtime_error(
"Enqueue process failed.",
245 PI_ERROR_INVALID_OPERATION);
251 throw runtime_error(
"Enqueue process failed.",
252 PI_ERROR_INVALID_OPERATION);
254 NewCmd->
getQueue()->reportAsyncException(std::current_exception());
273 std::vector<Command *> ToCleanUp;
291 if (!Lock.owns_lock())
298 if (!Lock.owns_lock())
308 std::vector<Command *> AuxiliaryCmds;
320 std::vector<Command *> ToCleanUp;
326 for (
Command *Cmd : AuxiliaryCmds) {
329 throw runtime_error(
"Enqueue process failed.",
330 PI_ERROR_INVALID_OPERATION);
333 if (
Command *NewCmd =
static_cast<Command *
>(NewCmdEvent->getCommand())) {
337 throw runtime_error(
"Enqueue process failed.",
338 PI_ERROR_INVALID_OPERATION);
349 std::vector<Command *> ToCleanUp;
353 assert(BlockedCmd &&
"Can't find appropriate command to unblock");
364 std::vector<Command *> &ToCleanUp) {
366 auto EnqueueLeaves = [&ToCleanUp, &GraphReadLock](
LeavesCollection &Leaves) {
372 throw runtime_error(
"Enqueue process failed.",
373 PI_ERROR_INVALID_OPERATION);
382 const std::vector<EventImplPtr> &ToEnqueue,
ReadLockT &GraphReadLock,
383 std::vector<Command *> &ToCleanUp) {
384 for (
auto &Event : ToEnqueue) {
392 throw runtime_error(
"Enqueue process failed.",
393 PI_ERROR_INVALID_OPERATION);
404 {sycl::property::queue::enable_profiling()}));
445 if (Lock.owns_lock()) {
449 std::vector<Command *> DeferredCleanupCommands;
454 for (
Command *Cmd : DeferredCleanupCommands) {
464 std::copy_if(Cmds.begin(), Cmds.end(),
467 return Cmd->getType() != Command::CommandType::FUSION;
480 std::vector<Command *> ToCleanUp;
484 std::vector<DepDesc> Deps = Cmd->
MDeps;
487 ToCleanUp.push_back(Cmd);
518 std::vector<std::shared_ptr<SYCLMemObjI>> TempStorage;
527 std::vector<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease;
531 if (Lock.owns_lock()) {
542 ObjsReadyToRelease.push_back(*MemObjIt);
547 auto ReleaseCandidateIt = ObjsReadyToRelease.begin();
548 while (ReleaseCandidateIt != ObjsReadyToRelease.end()) {
551 ReleaseCandidateIt = ObjsReadyToRelease.erase(ReleaseCandidateIt);
553 if (!ObjsReadyToRelease.empty()) {
557 std::make_move_iterator(ObjsReadyToRelease.begin()),
558 std::make_move_iterator(ObjsReadyToRelease.end()));
563 std::unordered_map<
EventImplPtr, std::vector<std::shared_ptr<const void>>>
566 std::vector<std::shared_ptr<const void>> &&Resources) {
567 std::vector<std::shared_ptr<const void>> &StoredResources =
568 AuxiliaryResources[Event];
569 StoredResources.insert(StoredResources.end(),
570 std::make_move_iterator(Resources.begin()),
571 std::make_move_iterator(Resources.end()));
582 std::move(Iter->second));
587 EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources) {
590 std::move(Resources));
599 Event->waitInternal();
601 }
else if (Event->isCompleted())
622 std::vector<Command *> ToEnqueue;
633 std::vector<Command *> ToEnqueue;
650 void Scheduler::printFusionWarning(
const std::string &Message) {
652 std::cerr <<
"WARNING: " << Message <<
"\n";
656 KernelFusionCommand *Scheduler::isPartOfActiveFusion(Command *Cmd) {
657 auto CmdType = Cmd->getType();
660 auto *FusionCmd =
static_cast<KernelFusionCommand *
>(Cmd);
661 return (FusionCmd->isActive()) ? FusionCmd :
nullptr;
664 auto *CGCmd =
static_cast<ExecCGCommand *
>(Cmd);
665 return (CGCmd->MFusionCmd && CGCmd->MFusionCmd->isActive())
676 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
678 const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
679 std::vector<detail::EventImplPtr> &Events) {
680 std::vector<Command *> AuxiliaryCmds;
687 Graph, Nodes, Queue, Requirements, Events, AuxiliaryCmds);
693 std::vector<Command *> ToCleanUp;
699 for (
Command *Cmd : AuxiliaryCmds) {
702 throw runtime_error(
"Enqueue process failed.",
703 PI_ERROR_INVALID_OPERATION);
706 if (
Command *NewCmd =
static_cast<Command *
>(NewCmdEvent->getCommand())) {
710 throw runtime_error(
"Enqueue process failed.",
711 PI_ERROR_INVALID_OPERATION);
725 if ((!SyclEventImplPtr->isContextInitialized() &&
726 !SyclEventImplPtr->is_host()) ||
727 SyclEventImplPtr->isNOP()) {
730 if (SyclEventImplPtr->is_host()) {
731 return SyclEventImplPtr->isCompleted();
734 if (SyclEventImplPtr->getContextImpl() != Context)
739 return SyclEventImplPtr->getHandleRef() !=
nullptr;
743 const std::vector<sycl::event> &DepEvents,
ContextImplPtr Context) {
746 DepEvents.begin(), DepEvents.end(), [&Context](
const sycl::event &Event) {
747 const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
748 return CheckEventReadiness(Context, SyclEventImplPtr);
753 const std::vector<EventImplPtr> &DepEvents,
ContextImplPtr Context) {
755 return std::all_of(DepEvents.begin(), DepEvents.end(),
757 return CheckEventReadiness(Context, SyclEventImplPtr);
The context class represents a SYCL context on which kernel functions may be executed.
detail::SYCLMemObjI * MSYCLMemObj
Base class for memory allocation commands.
ReleaseCommand * getReleaseCmd()
"Execute kernel" command group class.
std::vector< std::shared_ptr< detail::stream_impl > > getStreams() const
CGTYPE
Type of the command group.
The Command class represents some action that needs to be performed on one or more memory objects.
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
std::mutex MBlockedUsersMutex
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
const EventImplPtr & getEvent() const
CommandType getType() const
const QueueImplPtr & getQueue() const
bool isSchedulerAlive() const
Scheduler & getScheduler()
static GlobalHandler & instance()
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
std::shared_ptr< MemObjRecord > MRecord
Command * addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events, std::vector< Command * > &ToEnqueue)
Adds a command buffer update operation to the execution graph.
void startFusion(QueueImplPtr Queue)
void cleanupCommand(Command *Cmd, bool AllowUnsubmitted=false)
void decrementLeafCountersForRecord(MemObjRecord *Record)
Decrements leaf counters for all leaves of the record.
MemObjRecord * getMemObjRecord(SYCLMemObjI *MemObject)
EventImplPtr completeFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue, const property_list &)
Command * addHostAccessor(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to create a host accessor.
void cleanupCommandsForRecord(MemObjRecord *Record)
Removes commands that use the given MemObjRecord from the graph.
GraphBuildResult addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, std::vector< Command * > &ToEnqueue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers command group and adds it to the dependency graph.
void removeRecordForMemObj(SYCLMemObjI *MemObject)
Removes the MemObjRecord for the memory object passed.
Command * addCopyBack(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to update memory to the latest state.
bool isInFusionMode(QueueIdT queue)
Command * addCGUpdateHost(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &HostQueue, std::vector< Command * > &ToEnqueue)
Registers a command group that updates host memory to the latest state.
void cancelFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue)
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Clean up the internal fusion commands held for the given queue.
static void waitForEvent(const EventImplPtr &Event, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp, bool LockTheLock=true, bool *Success=nullptr)
Waits for the command, associated with Event passed, is completed.
static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock, EnqueueResultT &EnqueueResult, std::vector< Command * > &ToCleanUp, Command *RootCommand, BlockingT Blocking=NON_BLOCKING)
Enqueues the command and all its dependencies.
DPC++ graph scheduler class.
void waitForEvent(const EventImplPtr &Event, bool *Success=nullptr)
Waits for the event.
ReadLockT acquireFusionReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map.
EventImplPtr addCopyBack(Requirement *Req)
Registers a command group, that copies most recent memory to the memory pointed by the requirement.
bool isDeferredMemObjectsEmpty()
static void enqueueUnblockedCommands(const std::vector< EventImplPtr > &CmdsToEnqueue, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
ReadLockT acquireReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance.
EventImplPtr addHostAccessor(Requirement *Req)
Adds nodes to the graph, that update the requirement with the pointer to the host memory.
std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void > > > MAuxiliaryResources
void registerAuxiliaryResources(EventImplPtr &Event, std::vector< std::shared_ptr< const void >> Resources)
void cleanupAuxiliaryResources(BlockingT Blocking)
std::unique_lock< RWLockT > WriteLockT
EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &)
EventImplPtr addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events)
Adds a command buffer update operation to the execution graph.
void cleanupDeferredMemObjects(BlockingT Blocking)
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
void enqueueCommandForCG(EventImplPtr NewEvent, std::vector< Command * > &AuxilaryCmds, BlockingT Blocking=NON_BLOCKING)
std::mutex MDeferredCleanupMutex
bool isInFusionMode(QueueIdT Queue)
GraphBuilder MGraphBuilder
void cancelFusion(QueueImplPtr Queue)
std::shared_lock< RWLockT > ReadLockT
std::vector< std::shared_ptr< SYCLMemObjI > > MDeferredMemObjRelease
void startFusion(QueueImplPtr Queue)
bool checkLeavesCompletion(MemObjRecord *Record)
static MemObjRecord * getMemObjRecord(const Requirement *const Req)
void releaseHostAccessor(Requirement *Req)
Unblocks operations with the memory object.
void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock)
This function waits on all of the graph leaves which somehow use the memory object which is represent...
std::mutex MDeferredMemReleaseMutex
static Scheduler & getInstance()
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
std::mutex MAuxiliaryResourcesMutex
static bool isInstanceAlive()
void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src)
Assign Src's auxiliary resources to Dst.
void cleanupCommands(const std::vector< Command * > &Cmds)
void NotifyHostTaskCompletion(Command *Cmd)
WriteLockT acquireWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance.
bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock=true)
Removes buffer from the graph.
WriteLockT acquireFusionWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map...
std::vector< Command * > MDeferredCleanupCommands
void deferMemObjRelease(const std::shared_ptr< detail::SYCLMemObjI > &MemObj)
QueueImplPtr DefaultHostQueue
void releaseResources(BlockingT Blocking=BlockingT::BLOCKING)
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
static std::shared_ptr< device_impl > getHostDeviceImpl()
Gets the single instance of the Host Device.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Class representing the implementation of command_graph<executable>.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
bool CheckEventReadiness(const ContextImplPtr &Context, const EventImplPtr &SyclEventImplPtr)
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
static void registerAuxiliaryResourcesNoLock(std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void >>> &AuxiliaryResources, const EventImplPtr &Event, std::vector< std::shared_ptr< const void >> &&Resources)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
std::shared_ptr< detail::stream_impl > StreamImplPtr
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
Result of command enqueueing.
ResultT MResult
Indicates the result of enqueueing.
LeavesCollection MWriteLeaves
std::vector< AllocaCommandBase * > MAllocaCommands
LeavesCollection MReadLeaves