DPC++ Runtime
Runtime libraries for oneAPI DPC++
scheduler.cpp
Go to the documentation of this file.
1 //===-- scheduler.cpp - SYCL Scheduler --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include <detail/queue_impl.hpp>
13 #include <detail/stream_impl.hpp>
14 #include <sycl/device_selector.hpp>
15 
16 #include <chrono>
17 #include <cstdio>
18 #include <memory>
19 #include <mutex>
20 #include <set>
21 #include <thread>
22 #include <vector>
23 
24 namespace sycl {
26 namespace detail {
27 
28 bool Scheduler::checkLeavesCompletion(MemObjRecord *Record) {
29  for (Command *Cmd : Record->MReadLeaves) {
30  if (!Cmd->getEvent()->isCompleted())
31  return false;
32  }
33  for (Command *Cmd : Record->MWriteLeaves) {
34  if (!Cmd->getEvent()->isCompleted())
35  return false;
36  }
37  return true;
38 }
39 
40 void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
41  ReadLockT &GraphReadLock) {
42 #ifdef XPTI_ENABLE_INSTRUMENTATION
43  // Will contain the list of dependencies for the Release Command
44  std::set<Command *> DepCommands;
45 #endif
46  std::vector<Command *> ToCleanUp;
47  for (Command *Cmd : Record->MReadLeaves) {
48  EnqueueResultT Res;
49  bool Enqueued =
50  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
51  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
52  throw runtime_error("Enqueue process failed.",
53  PI_ERROR_INVALID_OPERATION);
54 #ifdef XPTI_ENABLE_INSTRUMENTATION
55  // Capture the dependencies
56  DepCommands.insert(Cmd);
57 #endif
58  GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp);
59  }
60  for (Command *Cmd : Record->MWriteLeaves) {
61  EnqueueResultT Res;
62  bool Enqueued =
63  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
64  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
65  throw runtime_error("Enqueue process failed.",
66  PI_ERROR_INVALID_OPERATION);
67 #ifdef XPTI_ENABLE_INSTRUMENTATION
68  DepCommands.insert(Cmd);
69 #endif
70  GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp);
71  }
72  for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) {
73  Command *ReleaseCmd = AllocaCmd->getReleaseCmd();
74  EnqueueResultT Res;
75  bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, GraphReadLock,
76  Res, ToCleanUp, ReleaseCmd);
77  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
78  throw runtime_error("Enqueue process failed.",
79  PI_ERROR_INVALID_OPERATION);
80 #ifdef XPTI_ENABLE_INSTRUMENTATION
81  // Report these dependencies to the Command so these dependencies can be
82  // reported as edges
83  ReleaseCmd->resolveReleaseDependencies(DepCommands);
84 #endif
85  GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock,
86  ToCleanUp);
87  }
88 }
89 
90 EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
91  const QueueImplPtr &Queue) {
92  EventImplPtr NewEvent = nullptr;
93  const CG::CGTYPE Type = CommandGroup->getType();
94  std::vector<Command *> AuxiliaryCmds;
95  std::vector<StreamImplPtr> Streams;
96  std::vector<std::shared_ptr<const void>> AuxiliaryResources;
97 
98  if (Type == CG::Kernel) {
99  auto *CGExecKernelPtr = static_cast<CGExecKernel *>(CommandGroup.get());
100  Streams = CGExecKernelPtr->getStreams();
101  CGExecKernelPtr->clearStreams();
102  AuxiliaryResources = CGExecKernelPtr->getAuxiliaryResources();
103  CGExecKernelPtr->clearAuxiliaryResources();
104  // Stream's flush buffer memory is mainly initialized in stream's __init
105  // method. However, this method is not available on host device.
106  // Initializing stream's flush buffer on the host side in a separate task.
107  if (Queue->is_host()) {
108  for (const StreamImplPtr &Stream : Streams) {
109  Stream->initStreamHost(Queue);
110  }
111  }
112  }
113 
114  bool ShouldEnqueue = true;
115  {
116  WriteLockT Lock = acquireWriteLock();
117 
118  Command *NewCmd = nullptr;
119  switch (Type) {
120  case CG::UpdateHost:
121  NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup),
122  DefaultHostQueue, AuxiliaryCmds);
123  NewEvent = NewCmd->getEvent();
124  break;
125  case CG::CodeplayHostTask: {
126  auto Result = MGraphBuilder.addCG(std::move(CommandGroup),
127  DefaultHostQueue, AuxiliaryCmds);
128  NewCmd = Result.NewCmd;
129  NewEvent = Result.NewEvent;
130  ShouldEnqueue = Result.ShouldEnqueue;
131  break;
132  }
133  default:
134  auto Result = MGraphBuilder.addCG(std::move(CommandGroup),
135  std::move(Queue), AuxiliaryCmds);
136  NewCmd = Result.NewCmd;
137  NewEvent = Result.NewEvent;
138  ShouldEnqueue = Result.ShouldEnqueue;
139  }
140  NewEvent->setSubmissionTime();
141  }
142 
143  if (ShouldEnqueue) {
144  enqueueCommandForCG(NewEvent, AuxiliaryCmds);
145 
146  for (auto StreamImplPtr : Streams) {
147  StreamImplPtr->flush(NewEvent);
148  }
149 
150  if (!AuxiliaryResources.empty())
151  registerAuxiliaryResources(NewEvent, std::move(AuxiliaryResources));
152  }
153 
154  return NewEvent;
155 }
156 
157 void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent,
158  std::vector<Command *> &AuxiliaryCmds,
159  BlockingT Blocking) {
160  std::vector<Command *> ToCleanUp;
161  {
162  ReadLockT Lock = acquireReadLock();
163 
164  Command *NewCmd =
165  (NewEvent) ? static_cast<Command *>(NewEvent->getCommand()) : nullptr;
166 
167  EnqueueResultT Res;
168  bool Enqueued;
169 
170  auto CleanUp = [&]() {
171  if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) {
172  if (NewEvent) {
173  NewEvent->setCommand(nullptr);
174  }
175  delete NewCmd;
176  }
177  };
178 
179  for (Command *Cmd : AuxiliaryCmds) {
180  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd,
181  Blocking);
182  try {
183  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
184  throw runtime_error("Auxiliary enqueue process failed.",
185  PI_ERROR_INVALID_OPERATION);
186  } catch (...) {
187  // enqueueCommand() func and if statement above may throw an exception,
188  // so destroy required resources to avoid memory leak
189  CleanUp();
190  std::rethrow_exception(std::current_exception());
191  }
192  }
193 
194  if (NewCmd) {
195  // TODO: Check if lazy mode.
196  EnqueueResultT Res;
197  try {
198  bool Enqueued = GraphProcessor::enqueueCommand(
199  NewCmd, Lock, Res, ToCleanUp, NewCmd, Blocking);
200  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
201  throw runtime_error("Enqueue process failed.",
202  PI_ERROR_INVALID_OPERATION);
203  } catch (...) {
204  // enqueueCommand() func and if statement above may throw an exception,
205  // so destroy required resources to avoid memory leak
206  CleanUp();
207  std::rethrow_exception(std::current_exception());
208  }
209  }
210  }
211  cleanupCommands(ToCleanUp);
212 }
213 
214 EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
215  std::vector<Command *> AuxiliaryCmds;
216  Command *NewCmd = nullptr;
217  {
218  WriteLockT Lock = acquireWriteLock();
219  NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds);
220  // Command was not creted because there were no operations with
221  // buffer.
222  if (!NewCmd)
223  return nullptr;
224  }
225 
226  std::vector<Command *> ToCleanUp;
227  try {
228  ReadLockT Lock = acquireReadLock();
229  EnqueueResultT Res;
230  bool Enqueued;
231 
232  for (Command *Cmd : AuxiliaryCmds) {
233  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
234  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
235  throw runtime_error("Enqueue process failed.",
236  PI_ERROR_INVALID_OPERATION);
237  }
238 
239  Enqueued =
240  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
241  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
242  throw runtime_error("Enqueue process failed.",
243  PI_ERROR_INVALID_OPERATION);
244  } catch (...) {
245  NewCmd->getQueue()->reportAsyncException(std::current_exception());
246  }
247  EventImplPtr NewEvent = NewCmd->getEvent();
248  cleanupCommands(ToCleanUp);
249  return NewEvent;
250 }
251 
252 Scheduler &Scheduler::getInstance() {
253  return GlobalHandler::instance().getScheduler();
254 }
255 
256 void Scheduler::waitForEvent(const EventImplPtr &Event) {
257  ReadLockT Lock = acquireReadLock();
258  // It's fine to leave the lock unlocked upon return from waitForEvent as
259  // there's no more actions to do here with graph
260  std::vector<Command *> ToCleanUp;
261  GraphProcessor::waitForEvent(std::move(Event), Lock, ToCleanUp,
262  /*LockTheLock=*/false);
263  cleanupCommands(ToCleanUp);
264 }
265 
266 bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj,
267  bool StrictLock) {
268  MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj);
269  if (!Record)
270  // No operations were performed on the mem object
271  return true;
272 
273  {
274  // This only needs a shared mutex as it only involves enqueueing and
275  // awaiting for events
276  ReadLockT Lock = StrictLock ? ReadLockT(MGraphLock)
277  : ReadLockT(MGraphLock, std::try_to_lock);
278  if (!Lock.owns_lock())
279  return false;
280  waitForRecordToFinish(Record, Lock);
281  }
282  {
283  WriteLockT Lock = StrictLock ? acquireWriteLock()
284  : WriteLockT(MGraphLock, std::try_to_lock);
285  if (!Lock.owns_lock())
286  return false;
287  MGraphBuilder.decrementLeafCountersForRecord(Record);
288  MGraphBuilder.cleanupCommandsForRecord(Record);
289  MGraphBuilder.removeRecordForMemObj(MemObj);
290  }
291  return true;
292 }
293 
294 EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
295  std::vector<Command *> AuxiliaryCmds;
296  EventImplPtr NewCmdEvent = nullptr;
297 
298  {
299  WriteLockT Lock = acquireWriteLock();
300 
301  Command *NewCmd = MGraphBuilder.addHostAccessor(Req, AuxiliaryCmds);
302  if (!NewCmd)
303  return nullptr;
304  NewCmdEvent = NewCmd->getEvent();
305  }
306 
307  std::vector<Command *> ToCleanUp;
308  {
309  ReadLockT Lock = acquireReadLock();
310  EnqueueResultT Res;
311  bool Enqueued;
312 
313  for (Command *Cmd : AuxiliaryCmds) {
314  Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
315  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
316  throw runtime_error("Enqueue process failed.",
317  PI_ERROR_INVALID_OPERATION);
318  }
319 
320  if (Command *NewCmd = static_cast<Command *>(NewCmdEvent->getCommand())) {
321  Enqueued =
322  GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
323  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
324  throw runtime_error("Enqueue process failed.",
325  PI_ERROR_INVALID_OPERATION);
326  }
327  }
328 
329  cleanupCommands(ToCleanUp);
330  return NewCmdEvent;
331 }
332 
333 void Scheduler::releaseHostAccessor(Requirement *Req) {
334  Command *const BlockedCmd = Req->MBlockedCmd;
335 
336  std::vector<Command *> ToCleanUp;
337  {
338  ReadLockT Lock = acquireReadLock();
339 
340  assert(BlockedCmd && "Can't find appropriate command to unblock");
341 
342  BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
343 
344  enqueueLeavesOfReqUnlocked(Req, Lock, ToCleanUp);
345  }
346  cleanupCommands(ToCleanUp);
347 }
348 
349 void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req,
350  ReadLockT &GraphReadLock,
351  std::vector<Command *> &ToCleanUp) {
352  MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get();
353  auto EnqueueLeaves = [&ToCleanUp, &GraphReadLock](LeavesCollection &Leaves) {
354  for (Command *Cmd : Leaves) {
355  EnqueueResultT Res;
356  bool Enqueued = GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res,
357  ToCleanUp, Cmd);
358  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
359  throw runtime_error("Enqueue process failed.",
360  PI_ERROR_INVALID_OPERATION);
361  }
362  };
363 
364  EnqueueLeaves(Record->MReadLeaves);
365  EnqueueLeaves(Record->MWriteLeaves);
366 }
367 
368 void Scheduler::enqueueUnblockedCommands(
369  const std::vector<EventImplPtr> &ToEnqueue, ReadLockT &GraphReadLock,
370  std::vector<Command *> &ToCleanUp) {
371  for (auto &Event : ToEnqueue) {
372  Command *Cmd = static_cast<Command *>(Event->getCommand());
373  if (!Cmd)
374  continue;
375  EnqueueResultT Res;
376  bool Enqueued =
377  GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
378  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
379  throw runtime_error("Enqueue process failed.",
380  PI_ERROR_INVALID_OPERATION);
381  }
382 }
383 
384 Scheduler::Scheduler() {
385  sycl::device HostDevice =
386  createSyclObjFromImpl<device>(device_impl::getHostDeviceImpl());
387  sycl::context HostContext{HostDevice};
388  DefaultHostQueue = QueueImplPtr(
389  new queue_impl(detail::getSyclObjImpl(HostDevice),
390  detail::getSyclObjImpl(HostContext), /*AsyncHandler=*/{},
391  /*PropList=*/{}));
392 }
393 
394 Scheduler::~Scheduler() { DefaultHostQueue.reset(); }
395 
396 void Scheduler::releaseResources() {
397  // There might be some commands scheduled for post enqueue cleanup that
398  // haven't been freed because of the graph mutex being locked at the time,
399  // clean them up now.
400  cleanupCommands({});
401 
402  cleanupAuxiliaryResources(BlockingT::BLOCKING);
403  // We need loop since sometimes we may need new objects to be added to
404  // deferred mem objects storage during cleanup. Known example is: we cleanup
405  // existing deferred mem objects under write lock, during this process we
406  // cleanup commands related to this record, command may have last reference to
407  // queue_impl, ~queue_impl is called and buffer for assert (which is created
408  // with size only so all confitions for deferred release are satisfied) is
409  // added to deferred mem obj storage. So we may end up with leak.
410  while (!isDeferredMemObjectsEmpty())
411  cleanupDeferredMemObjects(BlockingT::BLOCKING);
412 }
413 
414 MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) {
415  return Req->MSYCLMemObj->MRecord.get();
416 }
417 
418 void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) {
419  cleanupAuxiliaryResources(BlockingT::NON_BLOCKING);
420  cleanupDeferredMemObjects(BlockingT::NON_BLOCKING);
421 
422  if (Cmds.empty()) {
423  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
424  if (MDeferredCleanupCommands.empty())
425  return;
426  }
427 
428  WriteLockT Lock(MGraphLock, std::try_to_lock);
429  // In order to avoid deadlocks related to blocked commands, defer cleanup if
430  // the lock wasn't acquired.
431  if (Lock.owns_lock()) {
432  for (Command *Cmd : Cmds) {
433  MGraphBuilder.cleanupCommand(Cmd);
434  }
435  std::vector<Command *> DeferredCleanupCommands;
436  {
437  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
438  std::swap(DeferredCleanupCommands, MDeferredCleanupCommands);
439  }
440  for (Command *Cmd : DeferredCleanupCommands) {
441  MGraphBuilder.cleanupCommand(Cmd);
442  }
443 
444  } else {
445  std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex};
446  MDeferredCleanupCommands.insert(MDeferredCleanupCommands.end(),
447  Cmds.begin(), Cmds.end());
448  }
449 }
450 
451 void Scheduler::NotifyHostTaskCompletion(Command *Cmd) {
452  // Completing command's event along with unblocking enqueue readiness of
453  // empty command may lead to quick deallocation of MThisCmd by some cleanup
454  // process. Thus we'll copy deps prior to completing of event and unblocking
455  // of empty command.
456  // Also, it's possible to have record deallocated prior to enqueue process.
457  // Thus we employ read-lock of graph.
458 
459  std::vector<Command *> ToCleanUp;
460  {
461  ReadLockT Lock = acquireReadLock();
462 
463  std::vector<DepDesc> Deps = Cmd->MDeps;
464  // Host tasks are cleaned up upon completion rather than enqueuing.
465  if (Cmd->MLeafCounter == 0) {
466  ToCleanUp.push_back(Cmd);
467  Cmd->MMarkedForCleanup = true;
468  }
469 
470  {
471  std::lock_guard<std::mutex> Guard(Cmd->MBlockedUsersMutex);
472  // update self-event status
473  Cmd->getEvent()->setComplete();
474  }
475  Scheduler::enqueueUnblockedCommands(Cmd->MBlockedUsers, Lock, ToCleanUp);
476  }
477  cleanupCommands(ToCleanUp);
478 }
479 
480 void Scheduler::deferMemObjRelease(const std::shared_ptr<SYCLMemObjI> &MemObj) {
481  {
482  std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex};
483  MDeferredMemObjRelease.push_back(MemObj);
484  }
485  cleanupDeferredMemObjects(BlockingT::NON_BLOCKING);
486 }
487 
488 inline bool Scheduler::isDeferredMemObjectsEmpty() {
489  std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex};
490  return MDeferredMemObjRelease.empty();
491 }
492 
493 void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) {
494  if (isDeferredMemObjectsEmpty())
495  return;
496  if (Blocking == BlockingT::BLOCKING) {
497  std::vector<std::shared_ptr<SYCLMemObjI>> TempStorage;
498  {
499  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
500  MDeferredMemObjRelease.swap(TempStorage);
501  }
502  // if any objects in TempStorage exist - it is leaving scope and being
503  // deleted
504  }
505 
506  std::vector<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease;
507  {
508  // Lock is needed for checkLeavesCompletion - if walks through Record leaves
509  ReadLockT Lock = ReadLockT(MGraphLock, std::try_to_lock);
510  if (Lock.owns_lock()) {
511  // Not expected that Blocking == true will be used in parallel with
512  // adding MemObj to storage, no such scenario.
513  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
514  auto MemObjIt = MDeferredMemObjRelease.begin();
515  while (MemObjIt != MDeferredMemObjRelease.end()) {
516  MemObjRecord *Record = MGraphBuilder.getMemObjRecord((*MemObjIt).get());
517  if (!checkLeavesCompletion(Record)) {
518  MemObjIt++;
519  continue;
520  }
521  ObjsReadyToRelease.push_back(*MemObjIt);
522  MemObjIt = MDeferredMemObjRelease.erase(MemObjIt);
523  }
524  }
525  }
526  auto ReleaseCandidateIt = ObjsReadyToRelease.begin();
527  while (ReleaseCandidateIt != ObjsReadyToRelease.end()) {
528  if (!removeMemoryObject(ReleaseCandidateIt->get(), false))
529  break;
530  ReleaseCandidateIt = ObjsReadyToRelease.erase(ReleaseCandidateIt);
531  }
532  if (!ObjsReadyToRelease.empty()) {
533  std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex};
534  MDeferredMemObjRelease.insert(
535  MDeferredMemObjRelease.end(),
536  std::make_move_iterator(ObjsReadyToRelease.begin()),
537  std::make_move_iterator(ObjsReadyToRelease.end()));
538  }
539 }
540 
541 void Scheduler::registerAuxiliaryResources(
542  EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources) {
543  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
544  MAuxiliaryResources.insert({Event, std::move(Resources)});
545 }
546 
547 void Scheduler::cleanupAuxiliaryResources(BlockingT Blocking) {
548  std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
549  ForceDeferredReleaseWrapper ForceDeferredRelease;
550  for (auto It = MAuxiliaryResources.begin();
551  It != MAuxiliaryResources.end();) {
552  const EventImplPtr &Event = It->first;
553  if (Blocking == BlockingT::BLOCKING) {
554  Event->waitInternal();
555  It = MAuxiliaryResources.erase(It);
556  } else if (Event->isCompleted())
557  It = MAuxiliaryResources.erase(It);
558  else
559  ++It;
560  }
561 }
562 
563 thread_local bool Scheduler::ForceDeferredMemObjRelease = false;
564 
565 void Scheduler::startFusion(QueueImplPtr Queue) {
566  WriteLockT Lock = acquireWriteLock();
567  MGraphBuilder.startFusion(Queue);
568 }
569 
570 void Scheduler::cancelFusion(QueueImplPtr Queue) {
571  std::vector<Command *> ToEnqueue;
572  {
573  WriteLockT Lock = acquireWriteLock();
574  MGraphBuilder.cancelFusion(Queue, ToEnqueue);
575  }
576  enqueueCommandForCG(nullptr, ToEnqueue);
577 }
578 
579 EventImplPtr Scheduler::completeFusion(QueueImplPtr Queue,
580  const property_list &PropList) {
581  std::vector<Command *> ToEnqueue;
582  EventImplPtr FusedEvent;
583  {
584  WriteLockT Lock = acquireWriteLock();
585  FusedEvent = MGraphBuilder.completeFusion(Queue, ToEnqueue, PropList);
586  }
587  enqueueCommandForCG(nullptr, ToEnqueue);
588 
589  return FusedEvent;
590 }
591 
592 bool Scheduler::isInFusionMode(QueueIdT queue) {
593  ReadLockT Lock = acquireReadLock();
594  return MGraphBuilder.isInFusionMode(queue);
595 }
596 
597 void Scheduler::printFusionWarning(const std::string &Message) {
599  std::cerr << "WARNING: " << Message << "\n";
600  }
601 }
602 
603 KernelFusionCommand *Scheduler::isPartOfActiveFusion(Command *Cmd) {
604  auto CmdType = Cmd->getType();
605  switch (CmdType) {
606  case Command::FUSION: {
607  auto *FusionCmd = static_cast<KernelFusionCommand *>(Cmd);
608  return (FusionCmd->isActive()) ? FusionCmd : nullptr;
609  }
610  case Command::RUN_CG: {
611  auto *CGCmd = static_cast<ExecCGCommand *>(Cmd);
612  return (CGCmd->MFusionCmd && CGCmd->MFusionCmd->isActive())
613  ? CGCmd->MFusionCmd
614  : nullptr;
615  }
616  default:
617  return nullptr;
618  }
619 }
620 
621 } // namespace detail
622 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
623 } // namespace sycl
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:24
sycl::_V1::detail::Command
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:95
sycl::_V1::detail::EnqueueResultT::MResult
ResultT MResult
Indicates the result of enqueueing.
Definition: commands.hpp:61
sycl_mem_obj_i.hpp
sycl::_V1::detail::Command::resolveReleaseDependencies
void resolveReleaseDependencies(std::set< Command * > &list)
Looks at all the dependencies for the release command and enables instrumentation to report these dep...
Definition: commands.cpp:815
device_selector.hpp
sycl::_V1::detail::QueueIdT
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
Definition: scheduler.hpp:187
sycl::_V1::detail::AccessorImplHost
Definition: accessor_impl.hpp:42
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
sycl::_V1::detail::Scheduler::WriteLockT
std::unique_lock< RWLockT > WriteLockT
Definition: scheduler.hpp:463
sycl::_V1::detail::EnqueueResultT
Result of command enqueueing.
Definition: commands.hpp:50
sycl::_V1::detail::CGExecKernel::getStreams
std::vector< std::shared_ptr< detail::stream_impl > > getStreams() const
Definition: cg.hpp:176
sycl::_V1::detail::Command::getQueue
const QueueImplPtr & getQueue() const
Definition: commands.hpp:162
sycl::_V1::detail::SYCLConfig
Definition: config.hpp:110
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
queue_impl.hpp
sycl::_V1::detail::Command::MLeafCounter
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:292
scheduler.hpp
sycl::_V1::detail::Command::MDeps
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:286
sycl::_V1::detail::MemObjRecord::MReadLeaves
LeavesCollection MReadLeaves
Definition: scheduler.hpp:208
sycl::_V1::detail::NON_BLOCKING
@ NON_BLOCKING
Definition: commands.hpp:47
sycl::_V1::detail::BlockingT
BlockingT
Definition: commands.hpp:47
sycl::_V1::detail::LeavesCollection
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
Definition: leaves_collection.hpp:38
sycl::_V1::detail::Command::MBlockedUsersMutex
std::mutex MBlockedUsersMutex
Definition: commands.hpp:357
sycl::_V1::detail::AccessorImplHost::MSYCLMemObj
detail::SYCLMemObjI * MSYCLMemObj
Definition: accessor_impl.hpp:109
sycl::_V1::detail::MemObjRecord
Memory Object Record.
Definition: scheduler.hpp:198
sycl::_V1::detail::MemObjRecord::MWriteLeaves
LeavesCollection MWriteLeaves
Definition: scheduler.hpp:211
sycl::_V1::queue
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:89
sycl::_V1::detail::Scheduler::ReadLockT
std::shared_lock< RWLockT > ReadLockT
Definition: scheduler.hpp:462
sycl::_V1::detail::Command::getEvent
const EventImplPtr & getEvent() const
Definition: commands.hpp:164
sycl::_V1::detail::AccessorImplHost::MBlockedCmd
Command * MBlockedCmd
Definition: accessor_impl.hpp:118
std::cerr
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
global_handler.hpp
sycl::_V1::detail::EventImplPtr
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:42
sycl::_V1::detail::CGExecKernel
"Execute kernel" command group class.
Definition: cg.hpp:135
sycl::_V1::detail::queue_impl
Definition: queue_impl.hpp:59
sycl::_V1::detail::Command::MUsers
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:288
sycl::_V1::detail::StreamImplPtr
std::shared_ptr< detail::stream_impl > StreamImplPtr
Definition: commands.hpp:38
sycl::_V1::detail::CG::CGTYPE
CGTYPE
Type of the command group.
Definition: cg.hpp:55
sycl::_V1::detail::BLOCKING
@ BLOCKING
Definition: commands.hpp:47
sycl::_V1::detail::Command::MMarkedForCleanup
bool MMarkedForCleanup
Indicates that the node will be freed by graph cleanup.
Definition: commands.hpp:349
sycl::_V1::detail::QueueImplPtr
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:32
sycl::_V1::detail::SYCLMemObjI
Definition: sycl_mem_obj_i.hpp:28
sycl::_V1::detail::Scheduler
DPC++ graph scheduler class.
Definition: scheduler.hpp:363
sycl::_V1::detail::AllocaCommandBase
Base class for memory allocation commands.
Definition: commands.hpp:404
sycl::_V1::detail::Command::MBlockedUsers
std::vector< EventImplPtr > MBlockedUsers
Contains list of commands that depends on the host command explicitly (by depends_on).
Definition: commands.hpp:356
sycl::_V1::detail::AllocaCommandBase::getReleaseCmd
ReleaseCommand * getReleaseCmd()
Definition: commands.hpp:409
sycl::_V1::detail::MemObjRecord::MAllocaCommands
std::vector< AllocaCommandBase * > MAllocaCommands
Definition: scheduler.hpp:205
sycl::_V1::detail::Scheduler::ForceDeferredReleaseWrapper
Definition: scheduler.hpp:883
sycl::_V1::detail::SYCLMemObjI::MRecord
std::shared_ptr< MemObjRecord > MRecord
Definition: sycl_mem_obj_i.hpp:74
stream_impl.hpp
sycl::_V1::detail::Command::MEnqueueStatus
std::atomic< EnqueueResultT::ResultT > MEnqueueStatus
Describes the status of the command.
Definition: commands.hpp:309
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300