20 #include <shared_mutex>
21 #include <unordered_map>
22 #include <unordered_set>
175 inline namespace _V1 {
176 namespace ext::oneapi::experimental::detail {
177 class exec_graph_impl;
184 class DispatchHostTask;
187 using EventImplPtr = std::shared_ptr<detail::event_impl>;
188 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
191 using QueueIdT = std::hash<std::shared_ptr<detail::queue_impl>>::result_type;
194 using FusionMap = std::unordered_map<QueueIdT, FusionList>;
205 :
MReadLeaves{this, LeafLimit, AllocateDependency},
383 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
477 std::vector<Command *> &AuxilaryCmds,
489 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
491 const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
492 std::vector<detail::EventImplPtr> &Events);
504 while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
508 std::this_thread::yield();
523 while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
527 std::this_thread::yield();
551 std::vector<Command *> &ToCleanUp);
556 std::vector<Command *> &ToCleanUp);
572 EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources);
597 std::unique_ptr<detail::CG> CommandGroup,
const QueueImplPtr &Queue,
598 std::vector<Command *> &ToEnqueue,
600 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
608 std::vector<Command *> &ToEnqueue);
619 std::vector<Command *> &ToEnqueue);
646 std::vector<Command *> &ToEnqueue);
660 std::vector<Command *> &ToEnqueue);
665 std::vector<Command *> &ToCleanUp);
678 std::vector<Command *> &ToCleanUp);
689 std::vector<Command *> &ToEnqueue,
705 std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
707 const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
708 std::vector<detail::EventImplPtr> &Events,
709 std::vector<Command *> &ToEnqueue);
725 std::vector<Command *> &ToEnqueue);
731 std::vector<Command *> &ToEnqueue);
736 std::vector<Command *> &ToEnqueue);
739 std::set<Command *> findDepsForReq(
MemObjRecord *Record,
744 const std::vector<Requirement *> &Req,
747 std::vector<Command *> &ToEnqueue,
748 const bool AddDepsToLeaves =
true);
750 void createGraphForCommand(
Command *NewCmd,
CG &
CG,
bool isInteropTask,
751 std::vector<Requirement *> &Reqs,
752 const std::vector<detail::EventImplPtr> &Events,
754 std::vector<Command *> &ToEnqueue);
764 bool AllowConst =
true);
769 friend class ::MockScheduler;
777 std::vector<Command *> &ToEnqueue);
781 FusionMap::iterator findFusionList(
QueueIdT Id) {
782 return MFusionMap.find(Id);
785 void removeNodeFromGraph(
Command *Node, std::vector<Command *> &ToEnqueue);
789 std::queue<Command *> MCmdsToVisit;
791 std::vector<Command *> MVisitedCmds;
801 void printGraphAsDot(
const char *ModeName);
813 std::array<bool, PrintOptions::Size> MPrintOptionsArray{
false};
903 std::vector<Command *> &ToCleanUp,
904 bool LockTheLock =
true,
bool *Success =
nullptr);
917 std::vector<Command *> &ToCleanUp,
958 std::unordered_map<EventImplPtr, std::vector<std::shared_ptr<const void>>>
968 friend class ::MockScheduler;
971 static void printFusionWarning(
const std::string &Message);
Base class for memory allocation commands.
Base class for all types of command groups.
The Command class represents some action that needs to be performed on one or more memory objects.
The empty command does nothing during enqueue.
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
std::function< void(Command *, Command *, MemObjRecord *, EnqueueListT &)> AllocateDependencyF
void optimize(const EventImplPtr &Event)
[Provisional] Optimizes subgraph that consists of command associated with Event passed and its depend...
Command * addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events, std::vector< Command * > &ToEnqueue)
Adds a command buffer update operation to the execution graph.
void startFusion(QueueImplPtr Queue)
void cleanupCommand(Command *Cmd, bool AllowUnsubmitted=false)
std::vector< SYCLMemObjI * > MMemObjs
void decrementLeafCountersForRecord(MemObjRecord *Record)
Decrements leaf counters for all leaves of the record.
void optimize()
[Provisional] Optimizes the whole graph.
MemObjRecord * getMemObjRecord(SYCLMemObjI *MemObject)
EventImplPtr completeFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue, const property_list &)
Command * addHostAccessor(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to create a host accessor.
void cleanupCommandsForRecord(MemObjRecord *Record)
Removes commands that use the given MemObjRecord from the graph.
GraphBuildResult addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, std::vector< Command * > &ToEnqueue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers command group and adds it to the dependency graph.
void removeRecordForMemObj(SYCLMemObjI *MemObject)
Removes the MemObjRecord for the memory object passed.
Command * addCopyBack(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to update memory to the latest state.
bool isInFusionMode(QueueIdT queue)
Command * connectDepEvent(Command *const Cmd, const EventImplPtr &DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform connection of events in multiple contexts.
Command * addCGUpdateHost(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &HostQueue, std::vector< Command * > &ToEnqueue)
Registers a command group that updates host memory to the latest state.
MemObjRecord * getOrInsertMemObjRecord(const QueueImplPtr &Queue, const Requirement *Req, std::vector< Command * > &ToEnqueue)
void cancelFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue)
void addNodeToLeaves(MemObjRecord *Record, Command *Cmd, access::mode AccessMode, std::vector< Command * > &ToEnqueue)
Adds new command to leaves if needed.
void rescheduleCommand(Command *Cmd, const QueueImplPtr &Queue)
Reschedules the command passed using Queue provided.
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Clean up the internal fusion commands held for the given queue.
AllocaCommandBase * findAllocaForReq(MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context, bool AllowConst=true)
Searches for suitable alloca in memory record.
DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record)
Finds a command dependency corresponding to the record.
void updateLeaves(const std::set< Command * > &Cmds, MemObjRecord *Record, access::mode AccessMode, std::vector< Command * > &ToCleanUp)
Removes commands from leaves.
Graph Processor provides interfaces for enqueueing commands and their dependencies to the underlying ...
static void waitForEvent(const EventImplPtr &Event, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp, bool LockTheLock=true, bool *Success=nullptr)
Waits for the command, associated with Event passed, is completed.
static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock, EnqueueResultT &EnqueueResult, std::vector< Command * > &ToCleanUp, Command *RootCommand, BlockingT Blocking=NON_BLOCKING)
Enqueues the command and all its dependencies.
static bool handleBlockingCmd(Command *Cmd, EnqueueResultT &EnqueueResult, Command *RootCommand, BlockingT Blocking)
Check if successfully enqueued command is expected to be blocking for the dependent commands before i...
DPC++ graph scheduler class.
void waitForEvent(const EventImplPtr &Event, bool *Success=nullptr)
Waits for the event.
ReadLockT acquireFusionReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map.
EventImplPtr addCopyBack(Requirement *Req)
Registers a command group, that copies most recent memory to the memory pointed by the requirement.
bool isDeferredMemObjectsEmpty()
static void enqueueUnblockedCommands(const std::vector< EventImplPtr > &CmdsToEnqueue, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
ReadLockT acquireReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance.
EventImplPtr addHostAccessor(Requirement *Req)
Adds nodes to the graph, that update the requirement with the pointer to the host memory.
std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void > > > MAuxiliaryResources
void registerAuxiliaryResources(EventImplPtr &Event, std::vector< std::shared_ptr< const void >> Resources)
void cleanupAuxiliaryResources(BlockingT Blocking)
std::unique_lock< RWLockT > WriteLockT
EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &)
EventImplPtr addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events)
Adds a command buffer update operation to the execution graph.
QueueImplPtr getDefaultHostQueue()
std::shared_timed_mutex RWLockT
void cleanupDeferredMemObjects(BlockingT Blocking)
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
void enqueueCommandForCG(EventImplPtr NewEvent, std::vector< Command * > &AuxilaryCmds, BlockingT Blocking=NON_BLOCKING)
std::mutex MDeferredCleanupMutex
bool isInFusionMode(QueueIdT Queue)
GraphBuilder MGraphBuilder
void cancelFusion(QueueImplPtr Queue)
std::shared_lock< RWLockT > ReadLockT
std::vector< std::shared_ptr< SYCLMemObjI > > MDeferredMemObjRelease
void startFusion(QueueImplPtr Queue)
bool checkLeavesCompletion(MemObjRecord *Record)
static MemObjRecord * getMemObjRecord(const Requirement *const Req)
void releaseHostAccessor(Requirement *Req)
Unblocks operations with the memory object.
void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock)
This function waits on all of the graph leaves which somehow use the memory object which is represent...
std::mutex MDeferredMemReleaseMutex
static Scheduler & getInstance()
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
std::mutex MAuxiliaryResourcesMutex
static bool isInstanceAlive()
void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src)
Assign Src's auxiliary resources to Dst.
void cleanupCommands(const std::vector< Command * > &Cmds)
const QueueImplPtr & getDefaultHostQueue() const
void NotifyHostTaskCompletion(Command *Cmd)
WriteLockT acquireWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance.
bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock=true)
Removes buffer from the graph.
WriteLockT acquireFusionWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map...
std::vector< Command * > MDeferredCleanupCommands
void deferMemObjRelease(const std::shared_ptr< detail::SYCLMemObjI > &MemObj)
QueueImplPtr DefaultHostQueue
void releaseResources(BlockingT Blocking=BlockingT::BLOCKING)
Class representing the implementation of command_graph<executable>.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
std::unordered_map< QueueIdT, FusionList > FusionMap
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
std::unique_ptr< KernelFusionCommand > FusionList
std::shared_ptr< detail::stream_impl > StreamImplPtr
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
std::unique_ptr< Command > CommandPtr
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
Dependency between two commands.
Result of command enqueueing.
LeavesCollection MWriteLeaves
ContextImplPtr MCurContext
MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency)
std::vector< AllocaCommandBase * > MAllocaCommands
LeavesCollection MReadLeaves