DPC++ Runtime
Runtime libraries for oneAPI DPC++
scheduler.cpp
Go to the documentation of this file.
1 //===-- scheduler.cpp - SYCL Scheduler --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include <detail/graph_impl.hpp>
12 #include <detail/queue_impl.hpp>
14 #include <detail/stream_impl.hpp>
15 #include <sycl/device_selector.hpp>
16 
17 #include <chrono>
18 #include <cstdio>
19 #include <memory>
20 #include <mutex>
21 #include <set>
22 #include <thread>
23 #include <vector>
24 
25 namespace sycl {
26 inline namespace _V1 {
27 namespace detail {
28 
30  for (Command *Cmd : Record->MReadLeaves) {
31  if (!(Cmd->getType() == detail::Command::ALLOCA ||
33  !Cmd->getEvent()->isCompleted())
34  return false;
35  }
36  for (Command *Cmd : Record->MWriteLeaves) {
37  if (!(Cmd->getType() == detail::Command::ALLOCA ||
39  !Cmd->getEvent()->isCompleted())
40  return false;
41  }
42  return true;
43 }
44 
46  ReadLockT &GraphReadLock) {
47 #ifdef XPTI_ENABLE_INSTRUMENTATION
48  // Will contain the list of dependencies for the Release Command
49  std::set<Command *> DepCommands;
50 #endif
51  std::vector<Command *> ToCleanUp;
52  for (Command *Cmd : Record->MReadLeaves) {
53  EnqueueResultT Res;
54  bool Enqueued =
55  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
56  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
58  "Enqueue process failed.");
59 #ifdef XPTI_ENABLE_INSTRUMENTATION
60  // Capture the dependencies
61  DepCommands.insert(Cmd);
62 #endif
63  GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp);
64  }
65  for (Command *Cmd : Record->MWriteLeaves) {
66  EnqueueResultT Res;
67  bool Enqueued =
68  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
69  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
71  "Enqueue process failed.");
72 #ifdef XPTI_ENABLE_INSTRUMENTATION
73  DepCommands.insert(Cmd);
74 #endif
75  GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp);
76  }
77  for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) {
78  Command *ReleaseCmd = AllocaCmd->getReleaseCmd();
79  EnqueueResultT Res;
80  bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, GraphReadLock,
81  Res, ToCleanUp, ReleaseCmd);
82  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
84  "Enqueue process failed.");
85 #ifdef XPTI_ENABLE_INSTRUMENTATION
86  // Report these dependencies to the Command so these dependencies can be
87  // reported as edges
88  ReleaseCmd->resolveReleaseDependencies(DepCommands);
89 #endif
90  GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock,
91  ToCleanUp);
92  }
93 }
94 
96  std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
97  bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer,
98  const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies) {
99  EventImplPtr NewEvent = nullptr;
100  const CGType Type = CommandGroup->getType();
101  std::vector<Command *> AuxiliaryCmds;
102  std::vector<std::shared_ptr<const void>> AuxiliaryResources;
103  AuxiliaryResources = CommandGroup->getAuxiliaryResources();
104  CommandGroup->clearAuxiliaryResources();
105 
106  bool ShouldEnqueue = true;
107  {
108  WriteLockT Lock = acquireWriteLock();
109 
110  Command *NewCmd = nullptr;
111  switch (Type) {
112  case CGType::UpdateHost:
113  NewCmd =
114  MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), AuxiliaryCmds);
115  NewEvent = NewCmd->getEvent();
116  break;
118  auto Result = MGraphBuilder.addCG(std::move(CommandGroup), nullptr,
119  AuxiliaryCmds, EventNeeded);
120  NewCmd = Result.NewCmd;
121  NewEvent = Result.NewEvent;
122  ShouldEnqueue = Result.ShouldEnqueue;
123  break;
124  }
125  default:
126  auto Result = MGraphBuilder.addCG(
127  std::move(CommandGroup), std::move(Queue), AuxiliaryCmds, EventNeeded,
128  CommandBuffer, std::move(Dependencies));
129 
130  NewCmd = Result.NewCmd;
131  NewEvent = Result.NewEvent;
132  ShouldEnqueue = Result.ShouldEnqueue;
133  }
134  NewEvent->setSubmissionTime();
135  }
136 
137  if (ShouldEnqueue) {
138  enqueueCommandForCG(NewEvent, AuxiliaryCmds);
139  }
140 
141  if (!AuxiliaryResources.empty())
142  registerAuxiliaryResources(NewEvent, std::move(AuxiliaryResources));
143 
144  return NewEvent;
145 }
146 
148  std::vector<Command *> &AuxiliaryCmds,
149  BlockingT Blocking) {
150  std::vector<Command *> ToCleanUp;
151  {
152  ReadLockT Lock = acquireReadLock();
153 
154  Command *NewCmd =
155  (NewEvent) ? static_cast<Command *>(NewEvent->getCommand()) : nullptr;
156 
157  EnqueueResultT Res;
158  bool Enqueued;
159 
160  auto CleanUp = [&]() {
161  if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) {
162  if (NewEvent) {
163  NewEvent->setCommand(nullptr);
164  }
165  delete NewCmd;
166  }
167  };
168 
169  for (Command *Cmd : AuxiliaryCmds) {
170  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd,
171  Blocking);
172  try {
173  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
175  "Auxiliary enqueue process failed.");
176  } catch (...) {
177  // enqueueCommand() func and if statement above may throw an exception,
178  // so destroy required resources to avoid memory leak
179  CleanUp();
180  std::rethrow_exception(std::current_exception());
181  }
182  }
183 
184  if (NewCmd) {
185  // TODO: Check if lazy mode.
186  EnqueueResultT Res;
187  try {
188  bool Enqueued = GraphProcessor::enqueueCommand(
189  NewCmd, Lock, Res, ToCleanUp, NewCmd, Blocking);
190  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
192  "Enqueue process failed.");
193  } catch (...) {
194  // enqueueCommand() func and if statement above may throw an exception,
195  // so destroy required resources to avoid memory leak
196  CleanUp();
197  std::rethrow_exception(std::current_exception());
198  }
199  }
200  }
201  cleanupCommands(ToCleanUp);
202 }
203 
205  std::vector<Command *> AuxiliaryCmds;
206  Command *NewCmd = nullptr;
207  {
208  WriteLockT Lock = acquireWriteLock();
209  NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds);
210  // Command was not created because there were no operations with
211  // buffer.
212  if (!NewCmd)
213  return nullptr;
214  }
215 
216  std::vector<Command *> ToCleanUp;
217  try {
218  ReadLockT Lock = acquireReadLock();
219  EnqueueResultT Res;
220  bool Enqueued;
221 
222  for (Command *Cmd : AuxiliaryCmds) {
223  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
224  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
226  "Enqueue process failed.");
227  }
228 
229  Enqueued =
230  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
231  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
233  "Enqueue process failed.");
234  } catch (...) {
235  auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue();
236  assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null");
237  WorkerQueue->reportAsyncException(std::current_exception());
238  }
239  EventImplPtr NewEvent = NewCmd->getEvent();
240  cleanupCommands(ToCleanUp);
241  return NewEvent;
242 }
243 
246 }
247 
250 }
251 
252 void Scheduler::waitForEvent(const EventImplPtr &Event, bool *Success) {
253  ReadLockT Lock = acquireReadLock();
254  // It's fine to leave the lock unlocked upon return from waitForEvent as
255  // there's no more actions to do here with graph
256  std::vector<Command *> ToCleanUp;
257  GraphProcessor::waitForEvent(std::move(Event), Lock, ToCleanUp,
258  /*LockTheLock=*/false, Success);
259  cleanupCommands(ToCleanUp);
260 }
261 
263  bool StrictLock) {
264  MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj);
265  if (!Record)
266  // No operations were performed on the mem object
267  return true;
268 
269  {
270  // This only needs a shared mutex as it only involves enqueueing and
271  // awaiting for events
272  ReadLockT Lock = StrictLock ? ReadLockT(MGraphLock)
273  : ReadLockT(MGraphLock, std::try_to_lock);
274  if (!Lock.owns_lock())
275  return false;
276  waitForRecordToFinish(Record, Lock);
277  }
278  {
279  WriteLockT Lock = StrictLock ? acquireWriteLock()
280  : WriteLockT(MGraphLock, std::try_to_lock);
281  if (!Lock.owns_lock())
282  return false;
286  }
287  return true;
288 }
289 
291  std::vector<Command *> AuxiliaryCmds;
292  EventImplPtr NewCmdEvent = nullptr;
293 
294  {
295  WriteLockT Lock = acquireWriteLock();
296 
297  Command *NewCmd = MGraphBuilder.addHostAccessor(Req, AuxiliaryCmds);
298  if (!NewCmd)
299  return nullptr;
300  NewCmdEvent = NewCmd->getEvent();
301  }
302 
303  std::vector<Command *> ToCleanUp;
304  {
305  ReadLockT Lock = acquireReadLock();
306  EnqueueResultT Res;
307  bool Enqueued;
308 
309  for (Command *Cmd : AuxiliaryCmds) {
310  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
311  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
313  "Enqueue process failed.");
314  }
315 
316  if (Command *NewCmd = static_cast<Command *>(NewCmdEvent->getCommand())) {
317  Enqueued =
318  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
319  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
321  "Enqueue process failed.");
322  }
323  }
324 
325  cleanupCommands(ToCleanUp);
326  return NewCmdEvent;
327 }
328 
330  Command *const BlockedCmd = Req->MBlockedCmd;
331 
332  std::vector<Command *> ToCleanUp;
333  {
334  ReadLockT Lock = acquireReadLock();
335 
336  assert(BlockedCmd && "Can't find appropriate command to unblock");
337 
339 
340  enqueueLeavesOfReqUnlocked(Req, Lock, ToCleanUp);
341  }
342  cleanupCommands(ToCleanUp);
343 }
344 
346  ReadLockT &GraphReadLock,
347  std::vector<Command *> &ToCleanUp) {
348  MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get();
349  auto EnqueueLeaves = [&ToCleanUp, &GraphReadLock](LeavesCollection &Leaves) {
350  for (Command *Cmd : Leaves) {
351  EnqueueResultT Res;
352  bool Enqueued = GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res,
353  ToCleanUp, Cmd);
354  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
356  "Enqueue process failed.");
357  }
358  };
359 
360  EnqueueLeaves(Record->MReadLeaves);
361  EnqueueLeaves(Record->MWriteLeaves);
362 }
363 
365  const std::vector<EventImplPtr> &ToEnqueue, ReadLockT &GraphReadLock,
366  std::vector<Command *> &ToCleanUp) {
367  for (auto &Event : ToEnqueue) {
368  Command *Cmd = static_cast<Command *>(Event->getCommand());
369  if (!Cmd)
370  continue;
371  EnqueueResultT Res;
372  bool Enqueued =
373  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
374  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
376  "Enqueue process failed.");
377  }
378 }
379 
381  // There might be some commands scheduled for post enqueue cleanup that
382  // haven't been freed because of the graph mutex being locked at the time,
383  // clean them up now.
384  cleanupCommands({});
385 
386  cleanupAuxiliaryResources(Blocking);
387  // We need loop since sometimes we may need new objects to be added to
388  // deferred mem objects storage during cleanup. Known example is: we cleanup
389  // existing deferred mem objects under write lock, during this process we
390  // cleanup commands related to this record, command may have last reference to
391  // queue_impl, ~queue_impl is called and buffer for assert (which is created
392  // with size only so all confitions for deferred release are satisfied) is
393  // added to deferred mem obj storage. So we may end up with leak.
394  do {
395  cleanupDeferredMemObjects(Blocking);
396  } while (Blocking == BlockingT::BLOCKING && !isDeferredMemObjectsEmpty());
397 }
398 
400  return Req->MSYCLMemObj->MRecord.get();
401 }
402 
403 void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) {
406 
407  if (Cmds.empty()) {
408  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
409  if (MDeferredCleanupCommands.empty())
410  return;
411  }
412 
413  WriteLockT Lock(MGraphLock, std::try_to_lock);
414  // In order to avoid deadlocks related to blocked commands, defer cleanup if
415  // the lock wasn't acquired.
416  if (Lock.owns_lock()) {
417  for (Command *Cmd : Cmds) {
419  }
420  std::vector<Command *> DeferredCleanupCommands;
421  {
422  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
423  std::swap(DeferredCleanupCommands, MDeferredCleanupCommands);
424  }
425  for (Command *Cmd : DeferredCleanupCommands) {
427  }
428 
429  } else {
430  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
431  // Full cleanup for fusion placeholder commands is handled by the entry
432  // points for fusion (start_fusion, ...). To avoid double free or access to
433  // objects after their lifetime, fusion commands should therefore never be
434  // added to the deferred command list.
435  std::copy_if(Cmds.begin(), Cmds.end(),
436  std::back_inserter(MDeferredCleanupCommands),
437  [](const Command *Cmd) {
438  return Cmd->getType() != Command::CommandType::FUSION;
439  });
440  }
441 }
442 
444  // Completing command's event along with unblocking enqueue readiness of
445  // empty command may lead to quick deallocation of MThisCmd by some cleanup
446  // process. Thus we'll copy deps prior to completing of event and unblocking
447  // of empty command.
448  // Also, it's possible to have record deallocated prior to enqueue process.
449  // Thus we employ read-lock of graph.
450 
451  std::vector<Command *> ToCleanUp;
452  auto CmdEvent = Cmd->getEvent();
453  auto QueueImpl = CmdEvent->getSubmittedQueue();
454  assert(QueueImpl && "Submitted queue for host task must not be null");
455  {
456  ReadLockT Lock = acquireReadLock();
457 
458  std::vector<DepDesc> Deps = Cmd->MDeps;
459  // Host tasks are cleaned up upon completion rather than enqueuing.
460  if (Cmd->MLeafCounter == 0) {
461  ToCleanUp.push_back(Cmd);
462  Cmd->MMarkedForCleanup = true;
463  }
464  {
465  std::lock_guard<std::mutex> Guard(Cmd->MBlockedUsersMutex);
466  // update self-event status
467  CmdEvent->setComplete();
468  }
469  Scheduler::enqueueUnblockedCommands(Cmd->MBlockedUsers, Lock, ToCleanUp);
470  }
471  QueueImpl->revisitUnenqueuedCommandsState(CmdEvent);
472 
473  cleanupCommands(ToCleanUp);
474 }
475 
476 void Scheduler::deferMemObjRelease(const std::shared_ptr<SYCLMemObjI> &MemObj) {
477  {
478  std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex};
479  MDeferredMemObjRelease.push_back(MemObj);
480  }
482 }
483 
485  std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex};
486  return MDeferredMemObjRelease.empty();
487 }
488 
491  return;
492  if (Blocking == BlockingT::BLOCKING) {
493  std::vector<std::shared_ptr<SYCLMemObjI>> TempStorage;
494  {
495  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
496  MDeferredMemObjRelease.swap(TempStorage);
497  }
498  // if any objects in TempStorage exist - it is leaving scope and being
499  // deleted
500  }
501 
502  std::vector<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease;
503  {
504  // Lock is needed for checkLeavesCompletion - if walks through Record leaves
505  ReadLockT Lock = ReadLockT(MGraphLock, std::try_to_lock);
506  if (Lock.owns_lock()) {
507  // Not expected that Blocking == true will be used in parallel with
508  // adding MemObj to storage, no such scenario.
509  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
510  auto MemObjIt = MDeferredMemObjRelease.begin();
511  while (MemObjIt != MDeferredMemObjRelease.end()) {
512  MemObjRecord *Record = MGraphBuilder.getMemObjRecord((*MemObjIt).get());
513  if (!checkLeavesCompletion(Record)) {
514  MemObjIt++;
515  continue;
516  }
517  ObjsReadyToRelease.push_back(*MemObjIt);
518  MemObjIt = MDeferredMemObjRelease.erase(MemObjIt);
519  }
520  }
521  }
522  auto ReleaseCandidateIt = ObjsReadyToRelease.begin();
523  while (ReleaseCandidateIt != ObjsReadyToRelease.end()) {
524  if (!removeMemoryObject(ReleaseCandidateIt->get(), false))
525  break;
526  ReleaseCandidateIt = ObjsReadyToRelease.erase(ReleaseCandidateIt);
527  }
528  if (!ObjsReadyToRelease.empty()) {
529  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
530  MDeferredMemObjRelease.insert(
532  std::make_move_iterator(ObjsReadyToRelease.begin()),
533  std::make_move_iterator(ObjsReadyToRelease.end()));
534  }
535 }
536 
538  std::unordered_map<EventImplPtr, std::vector<std::shared_ptr<const void>>>
539  &AuxiliaryResources,
540  const EventImplPtr &Event,
541  std::vector<std::shared_ptr<const void>> &&Resources) {
542  std::vector<std::shared_ptr<const void>> &StoredResources =
543  AuxiliaryResources[Event];
544  StoredResources.insert(StoredResources.end(),
545  std::make_move_iterator(Resources.begin()),
546  std::make_move_iterator(Resources.end()));
547 }
548 
550  const EventImplPtr &Src) {
551  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
552  auto Iter = MAuxiliaryResources.find(Src);
553  if (Iter == MAuxiliaryResources.end()) {
554  return;
555  }
557  std::move(Iter->second));
558  MAuxiliaryResources.erase(Iter);
559 }
560 
562  EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources) {
563  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
565  std::move(Resources));
566 }
567 
569  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
570  for (auto It = MAuxiliaryResources.begin();
571  It != MAuxiliaryResources.end();) {
572  const EventImplPtr &Event = It->first;
573  if (Blocking == BlockingT::BLOCKING) {
574  Event->waitInternal();
575  It = MAuxiliaryResources.erase(It);
576  } else if (Event->isCompleted())
577  It = MAuxiliaryResources.erase(It);
578  else
579  ++It;
580  }
581 }
582 
584  WriteLockT Lock = acquireWriteLock();
585  WriteLockT FusionMapLock = acquireFusionWriteLock();
586  MGraphBuilder.startFusion(Queue);
587 }
588 
589 void Scheduler::cleanUpCmdFusion(sycl::detail::queue_impl *Queue) {
590  // No graph lock, we might be called because the graph builder is releasing
591  // resources.
592  WriteLockT FusionMapLock = acquireFusionWriteLock();
594 }
595 
597  std::vector<Command *> ToEnqueue;
598  {
599  WriteLockT Lock = acquireWriteLock();
600  WriteLockT FusionMapLock = acquireFusionWriteLock();
601  MGraphBuilder.cancelFusion(Queue, ToEnqueue);
602  }
603  enqueueCommandForCG(nullptr, ToEnqueue);
604 }
605 
607  const property_list &PropList) {
608  std::vector<Command *> ToEnqueue;
609  EventImplPtr FusedEvent;
610  {
611  WriteLockT Lock = acquireWriteLock();
612  WriteLockT FusionMapLock = acquireFusionWriteLock();
613  FusedEvent = MGraphBuilder.completeFusion(Queue, ToEnqueue, PropList);
614  }
615  enqueueCommandForCG(nullptr, ToEnqueue);
616 
617  return FusedEvent;
618 }
619 
623 }
624 
625 void Scheduler::printFusionWarning(const std::string &Message) {
627  std::cerr << "WARNING: " << Message << "\n";
628  }
629 }
630 
631 KernelFusionCommand *Scheduler::isPartOfActiveFusion(Command *Cmd) {
632  auto CmdType = Cmd->getType();
633  switch (CmdType) {
634  case Command::FUSION: {
635  auto *FusionCmd = static_cast<KernelFusionCommand *>(Cmd);
636  return (FusionCmd->isActive()) ? FusionCmd : nullptr;
637  }
638  case Command::RUN_CG: {
639  auto *CGCmd = static_cast<ExecCGCommand *>(Cmd);
640  return (CGCmd->MFusionCmd && CGCmd->MFusionCmd->isActive())
641  ? CGCmd->MFusionCmd
642  : nullptr;
643  }
644  default:
645  return nullptr;
646  }
647 }
648 
651  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
652  Nodes,
653  const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
654  std::vector<detail::EventImplPtr> &Events) {
655  std::vector<Command *> AuxiliaryCmds;
656  EventImplPtr NewCmdEvent = nullptr;
657 
658  {
659  WriteLockT Lock = acquireWriteLock();
660 
662  Graph, Nodes, Queue, Requirements, Events, AuxiliaryCmds);
663  if (!NewCmd)
664  return nullptr;
665  NewCmdEvent = NewCmd->getEvent();
666  }
667 
668  std::vector<Command *> ToCleanUp;
669  {
670  ReadLockT Lock = acquireReadLock();
671  EnqueueResultT Res;
672  bool Enqueued;
673 
674  for (Command *Cmd : AuxiliaryCmds) {
675  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
676  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
678  "Enqueue process failed.");
679  }
680 
681  if (Command *NewCmd = static_cast<Command *>(NewCmdEvent->getCommand())) {
682  Enqueued =
683  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
684  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
686  "Enqueue process failed.");
687  }
688  }
689 
690  cleanupCommands(ToCleanUp);
691  return NewCmdEvent;
692 }
693 
695  const EventImplPtr &SyclEventImplPtr) {
696  // Events that don't have an initialized context are throwaway events that
697  // don't represent actual dependencies. Calling getContextImpl() would set
698  // their context, which we wish to avoid as it is expensive.
699  // NOP events also don't represent actual dependencies.
700  if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
701  return true;
702  }
703  if (SyclEventImplPtr->isHost()) {
704  return SyclEventImplPtr->isCompleted();
705  }
706  // Cross-context dependencies can't be passed to the backend directly.
707  if (SyclEventImplPtr->getContextImpl() != Context)
708  return false;
709 
710  // A nullptr here means that the commmand does not produce a PI event or it
711  // hasn't been enqueued yet.
712  return SyclEventImplPtr->getHandleRef() != nullptr;
713 }
714 
716  const std::vector<sycl::event> &DepEvents, ContextImplPtr Context) {
717 
718  return std::all_of(
719  DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) {
720  const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
721  return CheckEventReadiness(Context, SyclEventImplPtr);
722  });
723 }
724 
726  const std::vector<EventImplPtr> &DepEvents, ContextImplPtr Context) {
727 
728  return std::all_of(DepEvents.begin(), DepEvents.end(),
729  [&Context](const EventImplPtr &SyclEventImplPtr) {
730  return CheckEventReadiness(Context, SyclEventImplPtr);
731  });
732 }
733 
734 } // namespace detail
735 } // namespace _V1
736 } // namespace sycl
Base class for memory allocation commands.
Definition: commands.hpp:453
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:106
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
Definition: commands.cpp:949
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
Definition: commands.hpp:386
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:321
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:317
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:315
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
Definition: commands.hpp:393
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
Definition: commands.hpp:338
const EventImplPtr & getEvent() const
Definition: commands.hpp:179
CommandType getType() const
Definition: commands.hpp:143
static GlobalHandler & instance()
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
std::shared_ptr< MemObjRecord > MRecord
Command * addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events, std::vector< Command * > &ToEnqueue)
Adds a command buffer update operation to the execution graph.
void cleanupCommand(Command *Cmd, bool AllowUnsubmitted=false)
Command * addCGUpdateHost(std::unique_ptr< detail::CG > CommandGroup, std::vector< Command * > &ToEnqueue)
Registers a command group that updates host memory to the latest state.
void decrementLeafCountersForRecord(MemObjRecord *Record)
Decrements leaf counters for all leaves of the record.
MemObjRecord * getMemObjRecord(SYCLMemObjI *MemObject)
EventImplPtr completeFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue, const property_list &)
Command * addHostAccessor(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to create a host accessor.
void cleanupCommandsForRecord(MemObjRecord *Record)
Removes commands that use the given MemObjRecord from the graph.
void removeRecordForMemObj(SYCLMemObjI *MemObject)
Removes the MemObjRecord for the memory object passed.
Command * addCopyBack(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to update memory to the latest state.
GraphBuildResult addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, std::vector< Command * > &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers command group and adds it to the dependency graph.
void cancelFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue)
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Clean up the internal fusion commands held for the given queue.
static void waitForEvent(const EventImplPtr &Event, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp, bool LockTheLock=true, bool *Success=nullptr)
Waits for the command, associated with Event passed, is completed.
static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock, EnqueueResultT &EnqueueResult, std::vector< Command * > &ToCleanUp, Command *RootCommand, BlockingT Blocking=NON_BLOCKING)
Enqueues the command and all its dependencies.
DPC++ graph scheduler class.
Definition: scheduler.hpp:366
void waitForEvent(const EventImplPtr &Event, bool *Success=nullptr)
Waits for the event.
Definition: scheduler.cpp:252
ReadLockT acquireFusionReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map.
Definition: scheduler.hpp:545
EventImplPtr addCopyBack(Requirement *Req)
Registers a command group, that copies most recent memory to the memory pointed by the requirement.
Definition: scheduler.cpp:204
static void enqueueUnblockedCommands(const std::vector< EventImplPtr > &CmdsToEnqueue, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
Definition: scheduler.cpp:364
ReadLockT acquireReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance.
Definition: scheduler.hpp:541
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:95
EventImplPtr addHostAccessor(Requirement *Req)
Adds nodes to the graph, that update the requirement with the pointer to the host memory.
Definition: scheduler.cpp:290
std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void > > > MAuxiliaryResources
Definition: scheduler.hpp:957
void registerAuxiliaryResources(EventImplPtr &Event, std::vector< std::shared_ptr< const void >> Resources)
Definition: scheduler.cpp:561
void cleanupAuxiliaryResources(BlockingT Blocking)
Definition: scheduler.cpp:568
std::unique_lock< RWLockT > WriteLockT
Definition: scheduler.hpp:499
EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &)
Definition: scheduler.cpp:606
EventImplPtr addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events)
Adds a command buffer update operation to the execution graph.
Definition: scheduler.cpp:649
void cleanupDeferredMemObjects(BlockingT Blocking)
Definition: scheduler.cpp:489
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
Definition: scheduler.cpp:345
void enqueueCommandForCG(EventImplPtr NewEvent, std::vector< Command * > &AuxilaryCmds, BlockingT Blocking=NON_BLOCKING)
Definition: scheduler.cpp:147
bool isInFusionMode(QueueIdT Queue)
Definition: scheduler.cpp:620
void cancelFusion(QueueImplPtr Queue)
Definition: scheduler.cpp:596
std::shared_lock< RWLockT > ReadLockT
Definition: scheduler.hpp:498
std::vector< std::shared_ptr< SYCLMemObjI > > MDeferredMemObjRelease
Definition: scheduler.hpp:953
void startFusion(QueueImplPtr Queue)
Definition: scheduler.cpp:583
bool checkLeavesCompletion(MemObjRecord *Record)
Definition: scheduler.cpp:29
static MemObjRecord * getMemObjRecord(const Requirement *const Req)
Definition: scheduler.cpp:399
void releaseHostAccessor(Requirement *Req)
Unblocks operations with the memory object.
Definition: scheduler.cpp:329
void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock)
This function waits on all of the graph leaves which somehow use the memory object which is represent...
Definition: scheduler.cpp:45
static Scheduler & getInstance()
Definition: scheduler.cpp:244
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Definition: scheduler.cpp:589
void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src)
Assign Src's auxiliary resources to Dst.
Definition: scheduler.cpp:549
void cleanupCommands(const std::vector< Command * > &Cmds)
Definition: scheduler.cpp:403
void NotifyHostTaskCompletion(Command *Cmd)
Definition: scheduler.cpp:443
WriteLockT acquireWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance.
Definition: scheduler.hpp:503
bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock=true)
Removes buffer from the graph.
Definition: scheduler.cpp:262
WriteLockT acquireFusionWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map...
Definition: scheduler.hpp:522
std::vector< Command * > MDeferredCleanupCommands
Definition: scheduler.hpp:950
void deferMemObjRelease(const std::shared_ptr< detail::SYCLMemObjI > &MemObj)
Definition: scheduler.cpp:476
void releaseResources(BlockingT Blocking=BlockingT::BLOCKING)
Definition: scheduler.cpp:380
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
Definition: scheduler.cpp:715
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Class representing the implementation of command_graph<executable>.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
bool CheckEventReadiness(const ContextImplPtr &Context, const EventImplPtr &SyclEventImplPtr)
Definition: scheduler.cpp:694
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
Definition: scheduler.hpp:191
static void registerAuxiliaryResourcesNoLock(std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void >>> &AuxiliaryResources, const EventImplPtr &Event, std::vector< std::shared_ptr< const void >> &&Resources)
Definition: scheduler.cpp:537
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
std::shared_ptr< event_impl > EventImplPtr
Definition: handler.hpp:184
CGType
Type of the command group.
Definition: cg_types.hpp:41
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:34
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
Result of command enqueueing.
Definition: commands.hpp:61
ResultT MResult
Indicates the result of enqueueing.
Definition: commands.hpp:72
Memory Object Record.
Definition: scheduler.hpp:202
std::vector< AllocaCommandBase * > MAllocaCommands
Definition: scheduler.hpp:208