12 #include <sycl/feature_test.hpp>
13 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
30 inline namespace _V1 {
51 #ifdef XPTI_ENABLE_INSTRUMENTATION
53 std::set<Command *> DepCommands;
55 std::vector<Command *> ToCleanUp;
62 "Enqueue process failed.");
63 #ifdef XPTI_ENABLE_INSTRUMENTATION
65 DepCommands.insert(Cmd);
75 "Enqueue process failed.");
76 #ifdef XPTI_ENABLE_INSTRUMENTATION
77 DepCommands.insert(Cmd);
85 Res, ToCleanUp, ReleaseCmd);
88 "Enqueue process failed.");
89 #ifdef XPTI_ENABLE_INSTRUMENTATION
100 std::unique_ptr<detail::CG> CommandGroup,
const QueueImplPtr &Queue,
102 const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies) {
104 const CGType Type = CommandGroup->getType();
105 std::vector<Command *> AuxiliaryCmds;
106 std::vector<std::shared_ptr<const void>> AuxiliaryResources;
107 AuxiliaryResources = CommandGroup->getAuxiliaryResources();
108 CommandGroup->clearAuxiliaryResources();
110 bool ShouldEnqueue =
true;
123 AuxiliaryCmds, EventNeeded);
124 NewCmd = Result.NewCmd;
125 NewEvent = Result.NewEvent;
126 ShouldEnqueue = Result.ShouldEnqueue;
131 std::move(CommandGroup), std::move(Queue), AuxiliaryCmds, EventNeeded,
132 CommandBuffer, std::move(Dependencies));
134 NewCmd = Result.NewCmd;
135 NewEvent = Result.NewEvent;
136 ShouldEnqueue = Result.ShouldEnqueue;
138 NewEvent->setSubmissionTime();
145 if (!AuxiliaryResources.empty())
152 std::vector<Command *> &AuxiliaryCmds,
154 std::vector<Command *> ToCleanUp;
159 (NewEvent) ?
static_cast<Command *
>(NewEvent->getCommand()) :
nullptr;
164 auto CleanUp = [&]() {
165 if (NewCmd && (NewCmd->
MDeps.size() == 0 && NewCmd->
MUsers.size() == 0)) {
167 NewEvent->setCommand(
nullptr);
173 for (
Command *Cmd : AuxiliaryCmds) {
179 "Auxiliary enqueue process failed.");
184 std::rethrow_exception(std::current_exception());
193 NewCmd, Lock, Res, ToCleanUp, NewCmd, Blocking);
196 "Enqueue process failed.");
201 std::rethrow_exception(std::current_exception());
209 std::vector<Command *> AuxiliaryCmds;
220 std::vector<Command *> ToCleanUp;
226 for (
Command *Cmd : AuxiliaryCmds) {
230 "Enqueue process failed.");
237 "Enqueue process failed.");
239 auto WorkerQueue = NewCmd->
getEvent()->getWorkerQueue();
240 assert(WorkerQueue &&
"WorkerQueue for CopyBack command must be not null");
241 WorkerQueue->reportAsyncException(std::current_exception());
260 std::vector<Command *> ToCleanUp;
278 if (!Lock.owns_lock())
285 if (!Lock.owns_lock())
295 std::vector<Command *> AuxiliaryCmds;
307 std::vector<Command *> ToCleanUp;
313 for (
Command *Cmd : AuxiliaryCmds) {
317 "Enqueue process failed.");
320 if (
Command *NewCmd =
static_cast<Command *
>(NewCmdEvent->getCommand())) {
325 "Enqueue process failed.");
336 std::vector<Command *> ToCleanUp;
340 assert(BlockedCmd &&
"Can't find appropriate command to unblock");
351 std::vector<Command *> &ToCleanUp) {
353 auto EnqueueLeaves = [&ToCleanUp, &GraphReadLock](
LeavesCollection &Leaves) {
360 "Enqueue process failed.");
369 const std::vector<EventImplPtr> &ToEnqueue,
ReadLockT &GraphReadLock,
370 std::vector<Command *> &ToCleanUp) {
371 for (
auto &Event : ToEnqueue) {
380 "Enqueue process failed.");
420 if (Lock.owns_lock()) {
424 std::vector<Command *> DeferredCleanupCommands;
429 for (
Command *Cmd : DeferredCleanupCommands) {
439 std::copy_if(Cmds.begin(), Cmds.end(),
442 return Cmd->getType() != Command::CommandType::FUSION;
455 std::vector<Command *> ToCleanUp;
457 auto QueueImpl = CmdEvent->getSubmittedQueue();
458 assert(QueueImpl &&
"Submitted queue for host task must not be null");
462 std::vector<DepDesc> Deps = Cmd->
MDeps;
465 ToCleanUp.push_back(Cmd);
471 CmdEvent->setComplete();
475 QueueImpl->revisitUnenqueuedCommandsState(CmdEvent);
497 std::vector<std::shared_ptr<SYCLMemObjI>> TempStorage;
506 std::vector<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease;
510 if (Lock.owns_lock()) {
521 ObjsReadyToRelease.push_back(*MemObjIt);
526 auto ReleaseCandidateIt = ObjsReadyToRelease.begin();
527 while (ReleaseCandidateIt != ObjsReadyToRelease.end()) {
530 ReleaseCandidateIt = ObjsReadyToRelease.erase(ReleaseCandidateIt);
532 if (!ObjsReadyToRelease.empty()) {
536 std::make_move_iterator(ObjsReadyToRelease.begin()),
537 std::make_move_iterator(ObjsReadyToRelease.end()));
542 std::unordered_map<
EventImplPtr, std::vector<std::shared_ptr<const void>>>
545 std::vector<std::shared_ptr<const void>> &&Resources) {
546 std::vector<std::shared_ptr<const void>> &StoredResources =
547 AuxiliaryResources[Event];
548 StoredResources.insert(StoredResources.end(),
549 std::make_move_iterator(Resources.begin()),
550 std::make_move_iterator(Resources.end()));
561 std::move(Iter->second));
566 EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources) {
569 std::move(Resources));
578 Event->waitInternal();
580 }
else if (Event->isCompleted())
601 std::vector<Command *> ToEnqueue;
613 [[maybe_unused]]
const std::string &KernelName,
614 [[maybe_unused]] std::vector<unsigned char> &SpecConstBlob) {
615 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
617 Queue, BinImage, KernelName, SpecConstBlob);
620 "Materialization of spec constants not supported by this build");
627 std::vector<Command *> ToEnqueue;
644 void Scheduler::printFusionWarning(
const std::string &Message) {
646 std::cerr <<
"WARNING: " << Message <<
"\n";
650 KernelFusionCommand *Scheduler::isPartOfActiveFusion(Command *Cmd) {
651 auto CmdType = Cmd->getType();
654 auto *FusionCmd =
static_cast<KernelFusionCommand *
>(Cmd);
655 return (FusionCmd->isActive()) ? FusionCmd :
nullptr;
658 auto *CGCmd =
static_cast<ExecCGCommand *
>(Cmd);
659 return (CGCmd->MFusionCmd && CGCmd->MFusionCmd->isActive())
670 std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
672 const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
673 std::vector<detail::EventImplPtr> &Events) {
674 std::vector<Command *> AuxiliaryCmds;
681 Graph, Nodes, Queue, Requirements, Events, AuxiliaryCmds);
687 std::vector<Command *> ToCleanUp;
693 for (
Command *Cmd : AuxiliaryCmds) {
697 "Enqueue process failed.");
700 if (
Command *NewCmd =
static_cast<Command *
>(NewCmdEvent->getCommand())) {
705 "Enqueue process failed.");
719 if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
722 if (SyclEventImplPtr->isHost()) {
723 return SyclEventImplPtr->isCompleted();
726 if (SyclEventImplPtr->getContextImpl() != Context)
731 return SyclEventImplPtr->getHandleRef() !=
nullptr;
735 const std::vector<sycl::event> &DepEvents,
ContextImplPtr Context) {
738 DepEvents.begin(), DepEvents.end(), [&Context](
const sycl::event &Event) {
739 const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
740 return CheckEventReadiness(Context, SyclEventImplPtr);
745 const std::vector<EventImplPtr> &DepEvents,
ContextImplPtr Context) {
747 return std::all_of(DepEvents.begin(), DepEvents.end(),
749 return CheckEventReadiness(Context, SyclEventImplPtr);
detail::SYCLMemObjI * MSYCLMemObj
Base class for memory allocation commands.
ReleaseCommand * getReleaseCmd()
The Command class represents some action that needs to be performed on one or more memory objects.
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
std::mutex MBlockedUsersMutex
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
const EventImplPtr & getEvent() const
CommandType getType() const
bool isSchedulerAlive() const
Scheduler & getScheduler()
static GlobalHandler & instance()
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
std::shared_ptr< MemObjRecord > MRecord
Command * addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events, std::vector< Command * > &ToEnqueue)
Adds a command buffer update operation to the execution graph.
void startFusion(QueueImplPtr Queue)
void cleanupCommand(Command *Cmd, bool AllowUnsubmitted=false)
Command * addCGUpdateHost(std::unique_ptr< detail::CG > CommandGroup, std::vector< Command * > &ToEnqueue)
Registers a command group that updates host memory to the latest state.
void decrementLeafCountersForRecord(MemObjRecord *Record)
Decrements leaf counters for all leaves of the record.
MemObjRecord * getMemObjRecord(SYCLMemObjI *MemObject)
EventImplPtr completeFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue, const property_list &)
Command * addHostAccessor(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to create a host accessor.
void cleanupCommandsForRecord(MemObjRecord *Record)
Removes commands that use the given MemObjRecord from the graph.
void removeRecordForMemObj(SYCLMemObjI *MemObject)
Removes the MemObjRecord for the memory object passed.
Command * addCopyBack(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to update memory to the latest state.
bool isInFusionMode(QueueIdT queue)
GraphBuildResult addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, std::vector< Command * > &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers command group and adds it to the dependency graph.
void cancelFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue)
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Clean up the internal fusion commands held for the given queue.
static void waitForEvent(const EventImplPtr &Event, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp, bool LockTheLock=true, bool *Success=nullptr)
Waits for the command, associated with Event passed, is completed.
static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock, EnqueueResultT &EnqueueResult, std::vector< Command * > &ToCleanUp, Command *RootCommand, BlockingT Blocking=NON_BLOCKING)
Enqueues the command and all its dependencies.
DPC++ graph scheduler class.
void waitForEvent(const EventImplPtr &Event, bool *Success=nullptr)
Waits for the event.
ReadLockT acquireFusionReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map.
EventImplPtr addCopyBack(Requirement *Req)
Registers a command group, that copies most recent memory to the memory pointed by the requirement.
bool isDeferredMemObjectsEmpty()
static void enqueueUnblockedCommands(const std::vector< EventImplPtr > &CmdsToEnqueue, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
ReadLockT acquireReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance.
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
EventImplPtr addHostAccessor(Requirement *Req)
Adds nodes to the graph, that update the requirement with the pointer to the host memory.
std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void > > > MAuxiliaryResources
void registerAuxiliaryResources(EventImplPtr &Event, std::vector< std::shared_ptr< const void >> Resources)
void cleanupAuxiliaryResources(BlockingT Blocking)
std::unique_lock< RWLockT > WriteLockT
sycl::detail::pi::PiKernel completeSpecConstMaterialization(QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, std::vector< unsigned char > &SpecConstBlob)
EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &)
EventImplPtr addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events)
Adds a command buffer update operation to the execution graph.
void cleanupDeferredMemObjects(BlockingT Blocking)
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
void enqueueCommandForCG(EventImplPtr NewEvent, std::vector< Command * > &AuxilaryCmds, BlockingT Blocking=NON_BLOCKING)
std::mutex MDeferredCleanupMutex
bool isInFusionMode(QueueIdT Queue)
GraphBuilder MGraphBuilder
void cancelFusion(QueueImplPtr Queue)
std::shared_lock< RWLockT > ReadLockT
std::vector< std::shared_ptr< SYCLMemObjI > > MDeferredMemObjRelease
void startFusion(QueueImplPtr Queue)
bool checkLeavesCompletion(MemObjRecord *Record)
static MemObjRecord * getMemObjRecord(const Requirement *const Req)
void releaseHostAccessor(Requirement *Req)
Unblocks operations with the memory object.
void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock)
This function waits on all of the graph leaves which somehow use the memory object which is represent...
std::mutex MDeferredMemReleaseMutex
static Scheduler & getInstance()
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
std::mutex MAuxiliaryResourcesMutex
static bool isInstanceAlive()
void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src)
Assign Src's auxiliary resources to Dst.
void cleanupCommands(const std::vector< Command * > &Cmds)
void NotifyHostTaskCompletion(Command *Cmd)
WriteLockT acquireWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance.
bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock=true)
Removes buffer from the graph.
WriteLockT acquireFusionWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map...
std::vector< Command * > MDeferredCleanupCommands
void deferMemObjRelease(const std::shared_ptr< detail::SYCLMemObjI > &MemObj)
void releaseResources(BlockingT Blocking=BlockingT::BLOCKING)
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
sycl::detail::pi::PiKernel materializeSpecConstants(QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, const std::vector< unsigned char > &SpecConstBlob)
static jit_compiler & get_instance()
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Class representing the implementation of command_graph<executable>.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
bool CheckEventReadiness(const ContextImplPtr &Context, const EventImplPtr &SyclEventImplPtr)
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
static void registerAuxiliaryResourcesNoLock(std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void >>> &AuxiliaryResources, const EventImplPtr &Event, std::vector< std::shared_ptr< const void >> &&Resources)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
std::shared_ptr< event_impl > EventImplPtr
CGType
Type of the command group.
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
Result of command enqueueing.
ResultT MResult
Indicates the result of enqueueing.
LeavesCollection MWriteLeaves
std::vector< AllocaCommandBase * > MAllocaCommands
LeavesCollection MReadLeaves