20 #include <shared_mutex>
21 #include <unordered_map>
22 #include <unordered_set>
175 inline namespace _V1 {
176 namespace ext::oneapi::experimental::detail {
177 class exec_graph_impl;
184 class DispatchHostTask;
187 using EventImplPtr = std::shared_ptr<detail::event_impl>;
188 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
191 using QueueIdT = std::hash<std::shared_ptr<detail::queue_impl>>::result_type;
194 using FusionMap = std::unordered_map<QueueIdT, FusionList>;
205 :
MReadLeaves{this, LeafLimit, AllocateDependency},
384 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
467 const std::string &KernelName, std::vector<unsigned char> &SpecConstBlob);
475 std::vector<Command *> &AuxilaryCmds,
487 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
489 const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
490 std::vector<detail::EventImplPtr> &Events);
509 while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
513 std::this_thread::yield();
528 while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
532 std::this_thread::yield();
556 std::vector<Command *> &ToCleanUp);
561 std::vector<Command *> &ToCleanUp);
577 EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources);
602 std::unique_ptr<detail::CG> CommandGroup,
const QueueImplPtr &Queue,
603 std::vector<Command *> &ToEnqueue,
bool EventNeeded,
605 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
612 std::vector<Command *> &ToEnqueue);
623 std::vector<Command *> &ToEnqueue);
663 std::vector<Command *> &ToEnqueue);
668 std::vector<Command *> &ToCleanUp);
681 std::vector<Command *> &ToCleanUp);
692 std::vector<Command *> &ToEnqueue,
708 std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
710 const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
711 std::vector<detail::EventImplPtr> &Events,
712 std::vector<Command *> &ToEnqueue);
728 std::vector<Command *> &ToEnqueue);
734 std::vector<Command *> &ToEnqueue);
739 std::vector<Command *> &ToEnqueue);
742 std::set<Command *> findDepsForReq(
MemObjRecord *Record,
747 const std::vector<Requirement *> &Req,
749 std::vector<Command *> &ToEnqueue);
751 void createGraphForCommand(
Command *NewCmd,
CG &
CG,
bool isInteropTask,
752 std::vector<Requirement *> &Reqs,
753 const std::vector<detail::EventImplPtr> &Events,
755 std::vector<Command *> &ToEnqueue);
765 bool AllowConst =
true);
770 friend class ::MockScheduler;
778 std::vector<Command *> &ToEnqueue);
782 FusionMap::iterator findFusionList(
QueueIdT Id) {
783 return MFusionMap.find(Id);
786 void removeNodeFromGraph(
Command *Node, std::vector<Command *> &ToEnqueue);
790 std::queue<Command *> MCmdsToVisit;
792 std::vector<Command *> MVisitedCmds;
802 void printGraphAsDot(
const char *ModeName);
814 std::array<bool, PrintOptions::Size> MPrintOptionsArray{
false};
904 std::vector<Command *> &ToCleanUp,
905 bool LockTheLock =
true,
bool *Success =
nullptr);
918 std::vector<Command *> &ToCleanUp,
959 std::unordered_map<EventImplPtr, std::vector<std::shared_ptr<const void>>>
967 friend class ::MockScheduler;
970 static void printFusionWarning(
const std::string &Message);
Base class for memory allocation commands.
Base class for all types of command groups.
The Command class represents some action that needs to be performed on one or more memory objects.
The empty command does nothing during enqueue.
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
std::function< void(Command *, Command *, MemObjRecord *, EnqueueListT &)> AllocateDependencyF
void optimize(const EventImplPtr &Event)
[Provisional] Optimizes subgraph that consists of command associated with Event passed and its depend...
Command * addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events, std::vector< Command * > &ToEnqueue)
Adds a command buffer update operation to the execution graph.
void startFusion(QueueImplPtr Queue)
void cleanupCommand(Command *Cmd, bool AllowUnsubmitted=false)
MemObjRecord * getOrInsertMemObjRecord(const QueueImplPtr &Queue, const Requirement *Req)
Command * addCGUpdateHost(std::unique_ptr< detail::CG > CommandGroup, std::vector< Command * > &ToEnqueue)
Registers a command group that updates host memory to the latest state.
std::vector< SYCLMemObjI * > MMemObjs
void decrementLeafCountersForRecord(MemObjRecord *Record)
Decrements leaf counters for all leaves of the record.
void optimize()
[Provisional] Optimizes the whole graph.
MemObjRecord * getMemObjRecord(SYCLMemObjI *MemObject)
EventImplPtr completeFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue, const property_list &)
Command * addHostAccessor(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to create a host accessor.
void cleanupCommandsForRecord(MemObjRecord *Record)
Removes commands that use the given MemObjRecord from the graph.
void removeRecordForMemObj(SYCLMemObjI *MemObject)
Removes the MemObjRecord for the memory object passed.
Command * addCopyBack(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to update memory to the latest state.
bool isInFusionMode(QueueIdT queue)
Command * connectDepEvent(Command *const Cmd, const EventImplPtr &DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform connection of events in multiple contexts.
GraphBuildResult addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, std::vector< Command * > &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers command group and adds it to the dependency graph.
void cancelFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue)
void addNodeToLeaves(MemObjRecord *Record, Command *Cmd, access::mode AccessMode, std::vector< Command * > &ToEnqueue)
Adds new command to leaves if needed.
void rescheduleCommand(Command *Cmd, const QueueImplPtr &Queue)
Reschedules the command passed using Queue provided.
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Clean up the internal fusion commands held for the given queue.
AllocaCommandBase * findAllocaForReq(MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context, bool AllowConst=true)
Searches for suitable alloca in memory record.
DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record)
Finds a command dependency corresponding to the record.
void updateLeaves(const std::set< Command * > &Cmds, MemObjRecord *Record, access::mode AccessMode, std::vector< Command * > &ToCleanUp)
Removes commands from leaves.
Graph Processor provides interfaces for enqueueing commands and their dependencies to the underlying ...
static void waitForEvent(const EventImplPtr &Event, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp, bool LockTheLock=true, bool *Success=nullptr)
Waits for the command, associated with Event passed, is completed.
static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock, EnqueueResultT &EnqueueResult, std::vector< Command * > &ToCleanUp, Command *RootCommand, BlockingT Blocking=NON_BLOCKING)
Enqueues the command and all its dependencies.
static bool handleBlockingCmd(Command *Cmd, EnqueueResultT &EnqueueResult, Command *RootCommand, BlockingT Blocking)
Check if successfully enqueued command is expected to be blocking for the dependent commands before i...
DPC++ graph scheduler class.
void waitForEvent(const EventImplPtr &Event, bool *Success=nullptr)
Waits for the event.
ReadLockT acquireFusionReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map.
EventImplPtr addCopyBack(Requirement *Req)
Registers a command group, that copies most recent memory to the memory pointed by the requirement.
bool isDeferredMemObjectsEmpty()
static void enqueueUnblockedCommands(const std::vector< EventImplPtr > &CmdsToEnqueue, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
ReadLockT acquireReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance.
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
EventImplPtr addHostAccessor(Requirement *Req)
Adds nodes to the graph, that update the requirement with the pointer to the host memory.
std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void > > > MAuxiliaryResources
void registerAuxiliaryResources(EventImplPtr &Event, std::vector< std::shared_ptr< const void >> Resources)
void cleanupAuxiliaryResources(BlockingT Blocking)
std::unique_lock< RWLockT > WriteLockT
sycl::detail::pi::PiKernel completeSpecConstMaterialization(QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, std::vector< unsigned char > &SpecConstBlob)
EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &)
EventImplPtr addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events)
Adds a command buffer update operation to the execution graph.
std::shared_timed_mutex RWLockT
void cleanupDeferredMemObjects(BlockingT Blocking)
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
void enqueueCommandForCG(EventImplPtr NewEvent, std::vector< Command * > &AuxilaryCmds, BlockingT Blocking=NON_BLOCKING)
std::mutex MDeferredCleanupMutex
bool isInFusionMode(QueueIdT Queue)
GraphBuilder MGraphBuilder
void cancelFusion(QueueImplPtr Queue)
std::shared_lock< RWLockT > ReadLockT
std::vector< std::shared_ptr< SYCLMemObjI > > MDeferredMemObjRelease
void startFusion(QueueImplPtr Queue)
bool checkLeavesCompletion(MemObjRecord *Record)
static MemObjRecord * getMemObjRecord(const Requirement *const Req)
void releaseHostAccessor(Requirement *Req)
Unblocks operations with the memory object.
void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock)
This function waits on all of the graph leaves which somehow use the memory object which is represent...
std::mutex MDeferredMemReleaseMutex
static Scheduler & getInstance()
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
std::mutex MAuxiliaryResourcesMutex
static bool isInstanceAlive()
void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src)
Assign Src's auxiliary resources to Dst.
void cleanupCommands(const std::vector< Command * > &Cmds)
void NotifyHostTaskCompletion(Command *Cmd)
WriteLockT acquireWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance.
bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock=true)
Removes buffer from the graph.
WriteLockT acquireFusionWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map...
std::vector< Command * > MDeferredCleanupCommands
void deferMemObjRelease(const std::shared_ptr< detail::SYCLMemObjI > &MemObj)
void releaseResources(BlockingT Blocking=BlockingT::BLOCKING)
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
Class representing the implementation of command_graph<executable>.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
std::unordered_map< QueueIdT, FusionList > FusionMap
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
std::unique_ptr< KernelFusionCommand > FusionList
std::shared_ptr< detail::stream_impl > StreamImplPtr
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
std::unique_ptr< Command > CommandPtr
class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
Dependency between two commands.
Result of command enqueueing.
LeavesCollection MWriteLeaves
ContextImplPtr MCurContext
MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency)
std::vector< AllocaCommandBase * > MAllocaCommands
LeavesCollection MReadLeaves