DPC++ Runtime
Runtime libraries for oneAPI DPC++
scheduler.cpp
Go to the documentation of this file.
1 //===-- scheduler.cpp - SYCL Scheduler --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include <detail/graph_impl.hpp>
12 #include <sycl/feature_test.hpp>
13 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
14 #include <detail/jit_compiler.hpp>
15 #endif
16 #include <detail/queue_impl.hpp>
18 #include <detail/stream_impl.hpp>
19 #include <sycl/device_selector.hpp>
20 
21 #include <chrono>
22 #include <cstdio>
23 #include <memory>
24 #include <mutex>
25 #include <set>
26 #include <thread>
27 #include <vector>
28 
29 namespace sycl {
30 inline namespace _V1 {
31 namespace detail {
32 
34  for (Command *Cmd : Record->MReadLeaves) {
35  if (!(Cmd->getType() == detail::Command::ALLOCA ||
37  !Cmd->getEvent()->isCompleted())
38  return false;
39  }
40  for (Command *Cmd : Record->MWriteLeaves) {
41  if (!(Cmd->getType() == detail::Command::ALLOCA ||
43  !Cmd->getEvent()->isCompleted())
44  return false;
45  }
46  return true;
47 }
48 
50  ReadLockT &GraphReadLock) {
51 #ifdef XPTI_ENABLE_INSTRUMENTATION
52  // Will contain the list of dependencies for the Release Command
53  std::set<Command *> DepCommands;
54 #endif
55  std::vector<Command *> ToCleanUp;
56  for (Command *Cmd : Record->MReadLeaves) {
57  EnqueueResultT Res;
58  bool Enqueued =
59  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
60  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
62  "Enqueue process failed.");
63 #ifdef XPTI_ENABLE_INSTRUMENTATION
64  // Capture the dependencies
65  DepCommands.insert(Cmd);
66 #endif
67  GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp);
68  }
69  for (Command *Cmd : Record->MWriteLeaves) {
70  EnqueueResultT Res;
71  bool Enqueued =
72  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
73  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
75  "Enqueue process failed.");
76 #ifdef XPTI_ENABLE_INSTRUMENTATION
77  DepCommands.insert(Cmd);
78 #endif
79  GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp);
80  }
81  for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) {
82  Command *ReleaseCmd = AllocaCmd->getReleaseCmd();
83  EnqueueResultT Res;
84  bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, GraphReadLock,
85  Res, ToCleanUp, ReleaseCmd);
86  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
88  "Enqueue process failed.");
89 #ifdef XPTI_ENABLE_INSTRUMENTATION
90  // Report these dependencies to the Command so these dependencies can be
91  // reported as edges
92  ReleaseCmd->resolveReleaseDependencies(DepCommands);
93 #endif
94  GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock,
95  ToCleanUp);
96  }
97 }
98 
100  std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
101  bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer,
102  const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies) {
103  EventImplPtr NewEvent = nullptr;
104  const CGType Type = CommandGroup->getType();
105  std::vector<Command *> AuxiliaryCmds;
106  std::vector<std::shared_ptr<const void>> AuxiliaryResources;
107  AuxiliaryResources = CommandGroup->getAuxiliaryResources();
108  CommandGroup->clearAuxiliaryResources();
109 
110  bool ShouldEnqueue = true;
111  {
112  WriteLockT Lock = acquireWriteLock();
113 
114  Command *NewCmd = nullptr;
115  switch (Type) {
116  case CGType::UpdateHost:
117  NewCmd =
118  MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), AuxiliaryCmds);
119  NewEvent = NewCmd->getEvent();
120  break;
122  auto Result = MGraphBuilder.addCG(std::move(CommandGroup), nullptr,
123  AuxiliaryCmds, EventNeeded);
124  NewCmd = Result.NewCmd;
125  NewEvent = Result.NewEvent;
126  ShouldEnqueue = Result.ShouldEnqueue;
127  break;
128  }
129  default:
130  auto Result = MGraphBuilder.addCG(
131  std::move(CommandGroup), std::move(Queue), AuxiliaryCmds, EventNeeded,
132  CommandBuffer, std::move(Dependencies));
133 
134  NewCmd = Result.NewCmd;
135  NewEvent = Result.NewEvent;
136  ShouldEnqueue = Result.ShouldEnqueue;
137  }
138  NewEvent->setSubmissionTime();
139  }
140 
141  if (ShouldEnqueue) {
142  enqueueCommandForCG(NewEvent, AuxiliaryCmds);
143  }
144 
145  if (!AuxiliaryResources.empty())
146  registerAuxiliaryResources(NewEvent, std::move(AuxiliaryResources));
147 
148  return NewEvent;
149 }
150 
152  std::vector<Command *> &AuxiliaryCmds,
153  BlockingT Blocking) {
154  std::vector<Command *> ToCleanUp;
155  {
156  ReadLockT Lock = acquireReadLock();
157 
158  Command *NewCmd =
159  (NewEvent) ? static_cast<Command *>(NewEvent->getCommand()) : nullptr;
160 
161  EnqueueResultT Res;
162  bool Enqueued;
163 
164  auto CleanUp = [&]() {
165  if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) {
166  if (NewEvent) {
167  NewEvent->setCommand(nullptr);
168  }
169  delete NewCmd;
170  }
171  };
172 
173  for (Command *Cmd : AuxiliaryCmds) {
174  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd,
175  Blocking);
176  try {
177  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
179  "Auxiliary enqueue process failed.");
180  } catch (...) {
181  // enqueueCommand() func and if statement above may throw an exception,
182  // so destroy required resources to avoid memory leak
183  CleanUp();
184  std::rethrow_exception(std::current_exception());
185  }
186  }
187 
188  if (NewCmd) {
189  // TODO: Check if lazy mode.
190  EnqueueResultT Res;
191  try {
192  bool Enqueued = GraphProcessor::enqueueCommand(
193  NewCmd, Lock, Res, ToCleanUp, NewCmd, Blocking);
194  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
196  "Enqueue process failed.");
197  } catch (...) {
198  // enqueueCommand() func and if statement above may throw an exception,
199  // so destroy required resources to avoid memory leak
200  CleanUp();
201  std::rethrow_exception(std::current_exception());
202  }
203  }
204  }
205  cleanupCommands(ToCleanUp);
206 }
207 
209  std::vector<Command *> AuxiliaryCmds;
210  Command *NewCmd = nullptr;
211  {
212  WriteLockT Lock = acquireWriteLock();
213  NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds);
214  // Command was not created because there were no operations with
215  // buffer.
216  if (!NewCmd)
217  return nullptr;
218  }
219 
220  std::vector<Command *> ToCleanUp;
221  try {
222  ReadLockT Lock = acquireReadLock();
223  EnqueueResultT Res;
224  bool Enqueued;
225 
226  for (Command *Cmd : AuxiliaryCmds) {
227  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
228  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
230  "Enqueue process failed.");
231  }
232 
233  Enqueued =
234  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
235  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
237  "Enqueue process failed.");
238  } catch (...) {
239  auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue();
240  assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null");
241  WorkerQueue->reportAsyncException(std::current_exception());
242  }
243  EventImplPtr NewEvent = NewCmd->getEvent();
244  cleanupCommands(ToCleanUp);
245  return NewEvent;
246 }
247 
250 }
251 
254 }
255 
256 void Scheduler::waitForEvent(const EventImplPtr &Event, bool *Success) {
257  ReadLockT Lock = acquireReadLock();
258  // It's fine to leave the lock unlocked upon return from waitForEvent as
259  // there's no more actions to do here with graph
260  std::vector<Command *> ToCleanUp;
261  GraphProcessor::waitForEvent(std::move(Event), Lock, ToCleanUp,
262  /*LockTheLock=*/false, Success);
263  cleanupCommands(ToCleanUp);
264 }
265 
267  bool StrictLock) {
268  MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj);
269  if (!Record)
270  // No operations were performed on the mem object
271  return true;
272 
273  {
274  // This only needs a shared mutex as it only involves enqueueing and
275  // awaiting for events
276  ReadLockT Lock = StrictLock ? ReadLockT(MGraphLock)
277  : ReadLockT(MGraphLock, std::try_to_lock);
278  if (!Lock.owns_lock())
279  return false;
280  waitForRecordToFinish(Record, Lock);
281  }
282  {
283  WriteLockT Lock = StrictLock ? acquireWriteLock()
284  : WriteLockT(MGraphLock, std::try_to_lock);
285  if (!Lock.owns_lock())
286  return false;
290  }
291  return true;
292 }
293 
295  std::vector<Command *> AuxiliaryCmds;
296  EventImplPtr NewCmdEvent = nullptr;
297 
298  {
299  WriteLockT Lock = acquireWriteLock();
300 
301  Command *NewCmd = MGraphBuilder.addHostAccessor(Req, AuxiliaryCmds);
302  if (!NewCmd)
303  return nullptr;
304  NewCmdEvent = NewCmd->getEvent();
305  }
306 
307  std::vector<Command *> ToCleanUp;
308  {
309  ReadLockT Lock = acquireReadLock();
310  EnqueueResultT Res;
311  bool Enqueued;
312 
313  for (Command *Cmd : AuxiliaryCmds) {
314  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
315  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
317  "Enqueue process failed.");
318  }
319 
320  if (Command *NewCmd = static_cast<Command *>(NewCmdEvent->getCommand())) {
321  Enqueued =
322  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
323  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
325  "Enqueue process failed.");
326  }
327  }
328 
329  cleanupCommands(ToCleanUp);
330  return NewCmdEvent;
331 }
332 
334  Command *const BlockedCmd = Req->MBlockedCmd;
335 
336  std::vector<Command *> ToCleanUp;
337  {
338  ReadLockT Lock = acquireReadLock();
339 
340  assert(BlockedCmd && "Can't find appropriate command to unblock");
341 
343 
344  enqueueLeavesOfReqUnlocked(Req, Lock, ToCleanUp);
345  }
346  cleanupCommands(ToCleanUp);
347 }
348 
350  ReadLockT &GraphReadLock,
351  std::vector<Command *> &ToCleanUp) {
352  MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get();
353  auto EnqueueLeaves = [&ToCleanUp, &GraphReadLock](LeavesCollection &Leaves) {
354  for (Command *Cmd : Leaves) {
355  EnqueueResultT Res;
356  bool Enqueued = GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res,
357  ToCleanUp, Cmd);
358  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
360  "Enqueue process failed.");
361  }
362  };
363 
364  EnqueueLeaves(Record->MReadLeaves);
365  EnqueueLeaves(Record->MWriteLeaves);
366 }
367 
369  const std::vector<EventImplPtr> &ToEnqueue, ReadLockT &GraphReadLock,
370  std::vector<Command *> &ToCleanUp) {
371  for (auto &Event : ToEnqueue) {
372  Command *Cmd = static_cast<Command *>(Event->getCommand());
373  if (!Cmd)
374  continue;
375  EnqueueResultT Res;
376  bool Enqueued =
377  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
378  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
380  "Enqueue process failed.");
381  }
382 }
383 
385  // There might be some commands scheduled for post enqueue cleanup that
386  // haven't been freed because of the graph mutex being locked at the time,
387  // clean them up now.
388  cleanupCommands({});
389 
390  cleanupAuxiliaryResources(Blocking);
391  // We need loop since sometimes we may need new objects to be added to
392  // deferred mem objects storage during cleanup. Known example is: we cleanup
393  // existing deferred mem objects under write lock, during this process we
394  // cleanup commands related to this record, command may have last reference to
395  // queue_impl, ~queue_impl is called and buffer for assert (which is created
396  // with size only so all confitions for deferred release are satisfied) is
397  // added to deferred mem obj storage. So we may end up with leak.
398  do {
399  cleanupDeferredMemObjects(Blocking);
400  } while (Blocking == BlockingT::BLOCKING && !isDeferredMemObjectsEmpty());
401 }
402 
404  return Req->MSYCLMemObj->MRecord.get();
405 }
406 
407 void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) {
410 
411  if (Cmds.empty()) {
412  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
413  if (MDeferredCleanupCommands.empty())
414  return;
415  }
416 
417  WriteLockT Lock(MGraphLock, std::try_to_lock);
418  // In order to avoid deadlocks related to blocked commands, defer cleanup if
419  // the lock wasn't acquired.
420  if (Lock.owns_lock()) {
421  for (Command *Cmd : Cmds) {
423  }
424  std::vector<Command *> DeferredCleanupCommands;
425  {
426  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
427  std::swap(DeferredCleanupCommands, MDeferredCleanupCommands);
428  }
429  for (Command *Cmd : DeferredCleanupCommands) {
431  }
432 
433  } else {
434  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
435  // Full cleanup for fusion placeholder commands is handled by the entry
436  // points for fusion (start_fusion, ...). To avoid double free or access to
437  // objects after their lifetime, fusion commands should therefore never be
438  // added to the deferred command list.
439  std::copy_if(Cmds.begin(), Cmds.end(),
440  std::back_inserter(MDeferredCleanupCommands),
441  [](const Command *Cmd) {
442  return Cmd->getType() != Command::CommandType::FUSION;
443  });
444  }
445 }
446 
448  // Completing command's event along with unblocking enqueue readiness of
449  // empty command may lead to quick deallocation of MThisCmd by some cleanup
450  // process. Thus we'll copy deps prior to completing of event and unblocking
451  // of empty command.
452  // Also, it's possible to have record deallocated prior to enqueue process.
453  // Thus we employ read-lock of graph.
454 
455  std::vector<Command *> ToCleanUp;
456  auto CmdEvent = Cmd->getEvent();
457  auto QueueImpl = CmdEvent->getSubmittedQueue();
458  assert(QueueImpl && "Submitted queue for host task must not be null");
459  {
460  ReadLockT Lock = acquireReadLock();
461 
462  std::vector<DepDesc> Deps = Cmd->MDeps;
463  // Host tasks are cleaned up upon completion rather than enqueuing.
464  if (Cmd->MLeafCounter == 0) {
465  ToCleanUp.push_back(Cmd);
466  Cmd->MMarkedForCleanup = true;
467  }
468  {
469  std::lock_guard<std::mutex> Guard(Cmd->MBlockedUsersMutex);
470  // update self-event status
471  CmdEvent->setComplete();
472  }
473  Scheduler::enqueueUnblockedCommands(Cmd->MBlockedUsers, Lock, ToCleanUp);
474  }
475  QueueImpl->revisitUnenqueuedCommandsState(CmdEvent);
476 
477  cleanupCommands(ToCleanUp);
478 }
479 
480 void Scheduler::deferMemObjRelease(const std::shared_ptr<SYCLMemObjI> &MemObj) {
481  {
482  std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex};
483  MDeferredMemObjRelease.push_back(MemObj);
484  }
486 }
487 
489  std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex};
490  return MDeferredMemObjRelease.empty();
491 }
492 
495  return;
496  if (Blocking == BlockingT::BLOCKING) {
497  std::vector<std::shared_ptr<SYCLMemObjI>> TempStorage;
498  {
499  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
500  MDeferredMemObjRelease.swap(TempStorage);
501  }
502  // if any objects in TempStorage exist - it is leaving scope and being
503  // deleted
504  }
505 
506  std::vector<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease;
507  {
508  // Lock is needed for checkLeavesCompletion - if walks through Record leaves
509  ReadLockT Lock = ReadLockT(MGraphLock, std::try_to_lock);
510  if (Lock.owns_lock()) {
511  // Not expected that Blocking == true will be used in parallel with
512  // adding MemObj to storage, no such scenario.
513  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
514  auto MemObjIt = MDeferredMemObjRelease.begin();
515  while (MemObjIt != MDeferredMemObjRelease.end()) {
516  MemObjRecord *Record = MGraphBuilder.getMemObjRecord((*MemObjIt).get());
517  if (!checkLeavesCompletion(Record)) {
518  MemObjIt++;
519  continue;
520  }
521  ObjsReadyToRelease.push_back(*MemObjIt);
522  MemObjIt = MDeferredMemObjRelease.erase(MemObjIt);
523  }
524  }
525  }
526  auto ReleaseCandidateIt = ObjsReadyToRelease.begin();
527  while (ReleaseCandidateIt != ObjsReadyToRelease.end()) {
528  if (!removeMemoryObject(ReleaseCandidateIt->get(), false))
529  break;
530  ReleaseCandidateIt = ObjsReadyToRelease.erase(ReleaseCandidateIt);
531  }
532  if (!ObjsReadyToRelease.empty()) {
533  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
534  MDeferredMemObjRelease.insert(
536  std::make_move_iterator(ObjsReadyToRelease.begin()),
537  std::make_move_iterator(ObjsReadyToRelease.end()));
538  }
539 }
540 
542  std::unordered_map<EventImplPtr, std::vector<std::shared_ptr<const void>>>
543  &AuxiliaryResources,
544  const EventImplPtr &Event,
545  std::vector<std::shared_ptr<const void>> &&Resources) {
546  std::vector<std::shared_ptr<const void>> &StoredResources =
547  AuxiliaryResources[Event];
548  StoredResources.insert(StoredResources.end(),
549  std::make_move_iterator(Resources.begin()),
550  std::make_move_iterator(Resources.end()));
551 }
552 
554  const EventImplPtr &Src) {
555  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
556  auto Iter = MAuxiliaryResources.find(Src);
557  if (Iter == MAuxiliaryResources.end()) {
558  return;
559  }
561  std::move(Iter->second));
562  MAuxiliaryResources.erase(Iter);
563 }
564 
566  EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources) {
567  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
569  std::move(Resources));
570 }
571 
573  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
574  for (auto It = MAuxiliaryResources.begin();
575  It != MAuxiliaryResources.end();) {
576  const EventImplPtr &Event = It->first;
577  if (Blocking == BlockingT::BLOCKING) {
578  Event->waitInternal();
579  It = MAuxiliaryResources.erase(It);
580  } else if (Event->isCompleted())
581  It = MAuxiliaryResources.erase(It);
582  else
583  ++It;
584  }
585 }
586 
588  WriteLockT Lock = acquireWriteLock();
589  WriteLockT FusionMapLock = acquireFusionWriteLock();
590  MGraphBuilder.startFusion(Queue);
591 }
592 
593 void Scheduler::cleanUpCmdFusion(sycl::detail::queue_impl *Queue) {
594  // No graph lock, we might be called because the graph builder is releasing
595  // resources.
596  WriteLockT FusionMapLock = acquireFusionWriteLock();
598 }
599 
601  std::vector<Command *> ToEnqueue;
602  {
603  WriteLockT Lock = acquireWriteLock();
604  WriteLockT FusionMapLock = acquireFusionWriteLock();
605  MGraphBuilder.cancelFusion(Queue, ToEnqueue);
606  }
607  enqueueCommandForCG(nullptr, ToEnqueue);
608 }
609 
611  [[maybe_unused]] QueueImplPtr Queue,
612  [[maybe_unused]] const RTDeviceBinaryImage *BinImage,
613  [[maybe_unused]] const std::string &KernelName,
614  [[maybe_unused]] std::vector<unsigned char> &SpecConstBlob) {
615 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
617  Queue, BinImage, KernelName, SpecConstBlob);
618 #else // SYCL_EXT_CODEPLAY_KERNEL_FUSION
619  printFusionWarning(
620  "Materialization of spec constants not supported by this build");
621  return nullptr;
622 #endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION
623 }
624 
626  const property_list &PropList) {
627  std::vector<Command *> ToEnqueue;
628  EventImplPtr FusedEvent;
629  {
630  WriteLockT Lock = acquireWriteLock();
631  WriteLockT FusionMapLock = acquireFusionWriteLock();
632  FusedEvent = MGraphBuilder.completeFusion(Queue, ToEnqueue, PropList);
633  }
634  enqueueCommandForCG(nullptr, ToEnqueue);
635 
636  return FusedEvent;
637 }
638 
642 }
643 
644 void Scheduler::printFusionWarning(const std::string &Message) {
646  std::cerr << "WARNING: " << Message << "\n";
647  }
648 }
649 
650 KernelFusionCommand *Scheduler::isPartOfActiveFusion(Command *Cmd) {
651  auto CmdType = Cmd->getType();
652  switch (CmdType) {
653  case Command::FUSION: {
654  auto *FusionCmd = static_cast<KernelFusionCommand *>(Cmd);
655  return (FusionCmd->isActive()) ? FusionCmd : nullptr;
656  }
657  case Command::RUN_CG: {
658  auto *CGCmd = static_cast<ExecCGCommand *>(Cmd);
659  return (CGCmd->MFusionCmd && CGCmd->MFusionCmd->isActive())
660  ? CGCmd->MFusionCmd
661  : nullptr;
662  }
663  default:
664  return nullptr;
665  }
666 }
667 
670  std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
671  Nodes,
672  const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
673  std::vector<detail::EventImplPtr> &Events) {
674  std::vector<Command *> AuxiliaryCmds;
675  EventImplPtr NewCmdEvent = nullptr;
676 
677  {
678  WriteLockT Lock = acquireWriteLock();
679 
681  Graph, Nodes, Queue, Requirements, Events, AuxiliaryCmds);
682  if (!NewCmd)
683  return nullptr;
684  NewCmdEvent = NewCmd->getEvent();
685  }
686 
687  std::vector<Command *> ToCleanUp;
688  {
689  ReadLockT Lock = acquireReadLock();
690  EnqueueResultT Res;
691  bool Enqueued;
692 
693  for (Command *Cmd : AuxiliaryCmds) {
694  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
695  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
697  "Enqueue process failed.");
698  }
699 
700  if (Command *NewCmd = static_cast<Command *>(NewCmdEvent->getCommand())) {
701  Enqueued =
702  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
703  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
705  "Enqueue process failed.");
706  }
707  }
708 
709  cleanupCommands(ToCleanUp);
710  return NewCmdEvent;
711 }
712 
714  const EventImplPtr &SyclEventImplPtr) {
715  // Events that don't have an initialized context are throwaway events that
716  // don't represent actual dependencies. Calling getContextImpl() would set
717  // their context, which we wish to avoid as it is expensive.
718  // NOP events also don't represent actual dependencies.
719  if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
720  return true;
721  }
722  if (SyclEventImplPtr->isHost()) {
723  return SyclEventImplPtr->isCompleted();
724  }
725  // Cross-context dependencies can't be passed to the backend directly.
726  if (SyclEventImplPtr->getContextImpl() != Context)
727  return false;
728 
729  // A nullptr here means that the commmand does not produce a PI event or it
730  // hasn't been enqueued yet.
731  return SyclEventImplPtr->getHandleRef() != nullptr;
732 }
733 
735  const std::vector<sycl::event> &DepEvents, ContextImplPtr Context) {
736 
737  return std::all_of(
738  DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) {
739  const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
740  return CheckEventReadiness(Context, SyclEventImplPtr);
741  });
742 }
743 
745  const std::vector<EventImplPtr> &DepEvents, ContextImplPtr Context) {
746 
747  return std::all_of(DepEvents.begin(), DepEvents.end(),
748  [&Context](const EventImplPtr &SyclEventImplPtr) {
749  return CheckEventReadiness(Context, SyclEventImplPtr);
750  });
751 }
752 
753 } // namespace detail
754 } // namespace _V1
755 } // namespace sycl
Base class for memory allocation commands.
Definition: commands.hpp:456
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:109
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
Definition: commands.cpp:950
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
Definition: commands.hpp:389
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:324
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:320
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:318
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
Definition: commands.hpp:396
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
Definition: commands.hpp:341
const EventImplPtr & getEvent() const
Definition: commands.hpp:182
CommandType getType() const
Definition: commands.hpp:146
static GlobalHandler & instance()
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
std::shared_ptr< MemObjRecord > MRecord
Command * addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events, std::vector< Command * > &ToEnqueue)
Adds a command buffer update operation to the execution graph.
void cleanupCommand(Command *Cmd, bool AllowUnsubmitted=false)
Command * addCGUpdateHost(std::unique_ptr< detail::CG > CommandGroup, std::vector< Command * > &ToEnqueue)
Registers a command group that updates host memory to the latest state.
void decrementLeafCountersForRecord(MemObjRecord *Record)
Decrements leaf counters for all leaves of the record.
MemObjRecord * getMemObjRecord(SYCLMemObjI *MemObject)
EventImplPtr completeFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue, const property_list &)
Command * addHostAccessor(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to create a host accessor.
void cleanupCommandsForRecord(MemObjRecord *Record)
Removes commands that use the given MemObjRecord from the graph.
void removeRecordForMemObj(SYCLMemObjI *MemObject)
Removes the MemObjRecord for the memory object passed.
Command * addCopyBack(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to update memory to the latest state.
GraphBuildResult addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, std::vector< Command * > &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers command group and adds it to the dependency graph.
void cancelFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue)
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Clean up the internal fusion commands held for the given queue.
static void waitForEvent(const EventImplPtr &Event, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp, bool LockTheLock=true, bool *Success=nullptr)
Waits for the command, associated with Event passed, is completed.
static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock, EnqueueResultT &EnqueueResult, std::vector< Command * > &ToCleanUp, Command *RootCommand, BlockingT Blocking=NON_BLOCKING)
Enqueues the command and all its dependencies.
DPC++ graph scheduler class.
Definition: scheduler.hpp:366
void waitForEvent(const EventImplPtr &Event, bool *Success=nullptr)
Waits for the event.
Definition: scheduler.cpp:256
ReadLockT acquireFusionReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map.
Definition: scheduler.hpp:548
EventImplPtr addCopyBack(Requirement *Req)
Registers a command group, that copies most recent memory to the memory pointed by the requirement.
Definition: scheduler.cpp:208
static void enqueueUnblockedCommands(const std::vector< EventImplPtr > &CmdsToEnqueue, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
Definition: scheduler.cpp:368
ReadLockT acquireReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance.
Definition: scheduler.hpp:544
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:99
EventImplPtr addHostAccessor(Requirement *Req)
Adds nodes to the graph, that update the requirement with the pointer to the host memory.
Definition: scheduler.cpp:294
std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void > > > MAuxiliaryResources
Definition: scheduler.hpp:960
void registerAuxiliaryResources(EventImplPtr &Event, std::vector< std::shared_ptr< const void >> Resources)
Definition: scheduler.cpp:565
void cleanupAuxiliaryResources(BlockingT Blocking)
Definition: scheduler.cpp:572
std::unique_lock< RWLockT > WriteLockT
Definition: scheduler.hpp:502
sycl::detail::pi::PiKernel completeSpecConstMaterialization(QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, std::vector< unsigned char > &SpecConstBlob)
Definition: scheduler.cpp:610
EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &)
Definition: scheduler.cpp:625
EventImplPtr addCommandGraphUpdate(ext::oneapi::experimental::detail::exec_graph_impl *Graph, std::vector< std::shared_ptr< ext::oneapi::experimental::detail::node_impl >> Nodes, const QueueImplPtr &Queue, std::vector< Requirement * > Requirements, std::vector< detail::EventImplPtr > &Events)
Adds a command buffer update operation to the execution graph.
Definition: scheduler.cpp:668
void cleanupDeferredMemObjects(BlockingT Blocking)
Definition: scheduler.cpp:493
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
Definition: scheduler.cpp:349
void enqueueCommandForCG(EventImplPtr NewEvent, std::vector< Command * > &AuxilaryCmds, BlockingT Blocking=NON_BLOCKING)
Definition: scheduler.cpp:151
bool isInFusionMode(QueueIdT Queue)
Definition: scheduler.cpp:639
void cancelFusion(QueueImplPtr Queue)
Definition: scheduler.cpp:600
std::shared_lock< RWLockT > ReadLockT
Definition: scheduler.hpp:501
std::vector< std::shared_ptr< SYCLMemObjI > > MDeferredMemObjRelease
Definition: scheduler.hpp:956
void startFusion(QueueImplPtr Queue)
Definition: scheduler.cpp:587
bool checkLeavesCompletion(MemObjRecord *Record)
Definition: scheduler.cpp:33
static MemObjRecord * getMemObjRecord(const Requirement *const Req)
Definition: scheduler.cpp:403
void releaseHostAccessor(Requirement *Req)
Unblocks operations with the memory object.
Definition: scheduler.cpp:333
void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock)
This function waits on all of the graph leaves which somehow use the memory object which is represent...
Definition: scheduler.cpp:49
static Scheduler & getInstance()
Definition: scheduler.cpp:248
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Definition: scheduler.cpp:593
void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src)
Assign Src's auxiliary resources to Dst.
Definition: scheduler.cpp:553
void cleanupCommands(const std::vector< Command * > &Cmds)
Definition: scheduler.cpp:407
void NotifyHostTaskCompletion(Command *Cmd)
Definition: scheduler.cpp:447
WriteLockT acquireWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance.
Definition: scheduler.hpp:506
bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock=true)
Removes buffer from the graph.
Definition: scheduler.cpp:266
WriteLockT acquireFusionWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map...
Definition: scheduler.hpp:525
std::vector< Command * > MDeferredCleanupCommands
Definition: scheduler.hpp:953
void deferMemObjRelease(const std::shared_ptr< detail::SYCLMemObjI > &MemObj)
Definition: scheduler.cpp:480
void releaseResources(BlockingT Blocking=BlockingT::BLOCKING)
Definition: scheduler.cpp:384
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
Definition: scheduler.cpp:734
sycl::detail::pi::PiKernel materializeSpecConstants(QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, const std::vector< unsigned char > &SpecConstBlob)
static jit_compiler & get_instance()
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Class representing the implementation of command_graph<executable>.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
bool CheckEventReadiness(const ContextImplPtr &Context, const EventImplPtr &SyclEventImplPtr)
Definition: scheduler.cpp:713
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
Definition: scheduler.hpp:191
static void registerAuxiliaryResourcesNoLock(std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void >>> &AuxiliaryResources, const EventImplPtr &Event, std::vector< std::shared_ptr< const void >> &&Resources)
Definition: scheduler.cpp:541
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
std::shared_ptr< event_impl > EventImplPtr
Definition: handler.hpp:184
CGType
Type of the command group.
Definition: cg_types.hpp:41
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: helpers.hpp:46
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
Result of command enqueueing.
Definition: commands.hpp:64
ResultT MResult
Indicates the result of enqueueing.
Definition: commands.hpp:75
Memory Object Record.
Definition: scheduler.hpp:202
std::vector< AllocaCommandBase * > MAllocaCommands
Definition: scheduler.hpp:208