DPC++ Runtime
Runtime libraries for oneAPI DPC++
scheduler.hpp
Go to the documentation of this file.
1 //==-------------- scheduler.hpp - SYCL standard header file ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
14 #include <sycl/detail/cg.hpp>
15 
16 #include <cstddef>
17 #include <memory>
18 #include <queue>
19 #include <set>
20 #include <shared_mutex>
21 #include <unordered_map>
22 #include <unordered_set>
23 #include <vector>
24 
170 
171 // For testing purposes
172 class MockScheduler;
173 
174 namespace sycl {
175 inline namespace _V1 {
176 namespace detail {
177 class queue_impl;
178 class event_impl;
179 class context_impl;
180 class DispatchHostTask;
181 
182 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
183 using EventImplPtr = std::shared_ptr<detail::event_impl>;
184 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
185 using StreamImplPtr = std::shared_ptr<detail::stream_impl>;
186 
187 using QueueIdT = std::hash<std::shared_ptr<detail::queue_impl>>::result_type;
188 using CommandPtr = std::unique_ptr<Command>;
189 using FusionList = std::unique_ptr<KernelFusionCommand>;
190 using FusionMap = std::unordered_map<QueueIdT, FusionList>;
191 
198 struct MemObjRecord {
199  MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit,
200  LeavesCollection::AllocateDependencyF AllocateDependency)
201  : MReadLeaves{this, LeafLimit, AllocateDependency},
202  MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {}
203 
204  // Contains all allocation commands for the memory object.
205  std::vector<AllocaCommandBase *> MAllocaCommands;
206 
207  // Contains latest read only commands working with memory object.
209 
210  // Contains latest write commands working with memory object.
212 
213  // The context which has the latest state of the memory object.
215 
216  // The mode this object can be accessed with from the host context.
217  // Valid only if the current context is host.
219 
220  // The flag indicates that the content of the memory object was/will be
221  // modified. Used while deciding if copy back needed.
222  bool MMemModified = false;
223 };
224 
259 //
260 // +----------+ +----------+ +----------+
261 // | | | | | |
262 // | Allocate |<----| Execute |<----| Execute |
263 // | | | | | |
264 // +----------+ +----------+ +----------+
265 //
293 // +----------+
294 // | |
295 // | D |
296 // | |
297 // +----------+
298 // / \
299 // / \
300 // v v
301 // +----------+ +----------+
302 // | | | |
303 // | B | | C |
304 // | | | |
305 // +----------+ +----------+
306 // \ /
307 // \ /
308 // v v
309 // +----------+
310 // | |
311 // | A |
312 // | |
313 // +----------+
363 class Scheduler {
364 public:
377  addCG(std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
378  sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr,
379  const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
380 
388 
395  void waitForEvent(const EventImplPtr &Event);
396 
419  bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock = true);
420 
432 
437  void releaseHostAccessor(Requirement *Req);
438 
440  static Scheduler &getInstance();
442  static bool isInstanceAlive();
443 
445 
447 
448  static MemObjRecord *getMemObjRecord(const Requirement *const Req);
449 
450  void deferMemObjRelease(const std::shared_ptr<detail::SYCLMemObjI> &MemObj);
451 
452  void startFusion(QueueImplPtr Queue);
453 
454  void cleanUpCmdFusion(sycl::detail::queue_impl *Queue);
455 
456  void cancelFusion(QueueImplPtr Queue);
457 
459 
460  bool isInFusionMode(QueueIdT Queue);
461 
462  Scheduler();
463  ~Scheduler();
466 
467  void enqueueCommandForCG(EventImplPtr NewEvent,
468  std::vector<Command *> &AuxilaryCmds,
469  BlockingT Blocking = NON_BLOCKING);
470 
471 protected:
472  using RWLockT = std::shared_timed_mutex;
473  using ReadLockT = std::shared_lock<RWLockT>;
474  using WriteLockT = std::unique_lock<RWLockT>;
475 
479 #ifdef _WIN32
480  WriteLockT Lock(MGraphLock, std::defer_lock);
481  while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
482  // Without yield while loop acts like endless while loop and occupies the
483  // whole CPU when multiple command groups are created in multiple host
484  // threads
485  std::this_thread::yield();
486  }
487 #else
488  WriteLockT Lock(MGraphLock);
489  // It is a deadlock on UNIX in implementation of lock and lock_shared, if
490  // try_lock in the loop above will be executed, so using a single lock here
491 #endif // _WIN32
492  return Lock;
493  }
494 
498 #ifdef _WIN32
499  WriteLockT Lock(MFusionMapLock, std::defer_lock);
500  while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
501  // Without yield while loop acts like endless while loop and occupies the
502  // whole CPU when multiple command groups are created in multiple host
503  // threads
504  std::this_thread::yield();
505  }
506 #else
508  // It is a deadlock on UNIX in implementation of lock and lock_shared, if
509  // try_lock in the loop above will be executed, so using a single lock here
510 #endif // _WIN32
511  return Lock;
512  }
513 
517 
521 
522  void cleanupCommands(const std::vector<Command *> &Cmds);
523 
525 
526  static void enqueueLeavesOfReqUnlocked(const Requirement *const Req,
527  ReadLockT &GraphReadLock,
528  std::vector<Command *> &ToCleanUp);
529 
530  static void
531  enqueueUnblockedCommands(const std::vector<EventImplPtr> &CmdsToEnqueue,
532  ReadLockT &GraphReadLock,
533  std::vector<Command *> &ToCleanUp);
534 
535  // May lock graph with read and write modes during execution.
536  void cleanupDeferredMemObjects(BlockingT Blocking);
537 
538  // POD struct to convey some additional information from GraphBuilder::addCG
539  // to the Scheduler to support kernel fusion.
544  };
545 
547  void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src);
549  EventImplPtr &Event, std::vector<std::shared_ptr<const void>> Resources);
550  void cleanupAuxiliaryResources(BlockingT Blocking);
551 
558  class GraphBuilder {
559  public:
560  GraphBuilder();
561 
574  std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
575  std::vector<Command *> &ToEnqueue,
576  sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr,
577  const std::vector<sycl::detail::pi::PiExtSyncPoint> &Dependencies = {});
578 
583  Command *addCGUpdateHost(std::unique_ptr<detail::CG> CommandGroup,
584  const QueueImplPtr &HostQueue,
585  std::vector<Command *> &ToEnqueue);
586 
590  Command *addCopyBack(Requirement *Req, std::vector<Command *> &ToEnqueue);
591 
596  std::vector<Command *> &ToEnqueue);
597 
599  void optimize();
600 
603  void optimize(const EventImplPtr &Event);
604 
605  void cleanupCommand(Command *Cmd, bool AllowUnsubmitted = false);
606 
613  void rescheduleCommand(Command *Cmd, const QueueImplPtr &Queue);
614 
618 
622  const Requirement *Req,
623  std::vector<Command *> &ToEnqueue);
624 
627 
630 
632  void removeRecordForMemObj(SYCLMemObjI *MemObject);
633 
635  void addNodeToLeaves(MemObjRecord *Record, Command *Cmd,
637  std::vector<Command *> &ToEnqueue);
638 
640  void updateLeaves(const std::set<Command *> &Cmds, MemObjRecord *Record,
642  std::vector<Command *> &ToCleanUp);
643 
653  Command *connectDepEvent(Command *const Cmd, const EventImplPtr &DepEvent,
654  const DepDesc &Dep,
655  std::vector<Command *> &ToCleanUp);
656 
657  void startFusion(QueueImplPtr Queue);
658 
661  void cleanUpCmdFusion(sycl::detail::queue_impl *Queue);
662 
663  void cancelFusion(QueueImplPtr Queue, std::vector<Command *> &ToEnqueue);
664 
666  std::vector<Command *> &ToEnqueue,
667  const property_list &);
668 
670 
671  std::vector<SYCLMemObjI *> MMemObjs;
672 
673  private:
683  Command *insertMemoryMove(MemObjRecord *Record, Requirement *Req,
684  const QueueImplPtr &Queue,
685  std::vector<Command *> &ToEnqueue);
686 
687  // Inserts commands required to remap the memory object to its current host
688  // context so that the required access mode becomes valid.
689  Command *remapMemoryObject(MemObjRecord *Record, Requirement *Req,
690  AllocaCommandBase *HostAllocaCmd,
691  std::vector<Command *> &ToEnqueue);
692 
694  insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req,
695  const QueueImplPtr &Queue,
696  std::vector<Command *> &ToEnqueue);
697 
699  std::set<Command *> findDepsForReq(MemObjRecord *Record,
700  const Requirement *Req,
701  const ContextImplPtr &Context);
702 
703  EmptyCommand *addEmptyCmd(Command *Cmd,
704  const std::vector<Requirement *> &Req,
705  const QueueImplPtr &Queue,
706  Command::BlockReason Reason,
707  std::vector<Command *> &ToEnqueue,
708  const bool AddDepsToLeaves = true);
709 
710  void createGraphForCommand(Command *NewCmd, CG &CG, bool isInteropTask,
711  std::vector<Requirement *> &Reqs,
712  const std::vector<detail::EventImplPtr> &Events,
713  QueueImplPtr Queue,
714  std::vector<Command *> &ToEnqueue);
715 
716  protected:
719 
722  const Requirement *Req,
723  const ContextImplPtr &Context,
724  bool AllowConst = true);
725 
726  friend class Command;
727 
728  private:
729  friend class ::MockScheduler;
730 
735  getOrCreateAllocaForReq(MemObjRecord *Record, const Requirement *Req,
736  const QueueImplPtr &Queue,
737  std::vector<Command *> &ToEnqueue);
738 
739  void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req);
740 
741  FusionMap::iterator findFusionList(QueueIdT Id) {
742  return MFusionMap.find(Id);
743  }
744 
745  void removeNodeFromGraph(Command *Node, std::vector<Command *> &ToEnqueue);
746 
749  std::queue<Command *> MCmdsToVisit;
751  std::vector<Command *> MVisitedCmds;
752 
755  FusionMap MFusionMap;
756 
761  void printGraphAsDot(const char *ModeName);
762  enum PrintOptions {
763  BeforeAddCG = 0,
764  AfterAddCG,
765  BeforeAddCopyBack,
766  AfterAddCopyBack,
767  BeforeAddHostAcc,
768  AfterAddHostAcc,
769  AfterFusionComplete,
770  AfterFusionCancel,
771  Size
772  };
773  std::array<bool, PrintOptions::Size> MPrintOptionsArray{false};
774  };
775 
849  public:
857  static void waitForEvent(const EventImplPtr &Event,
858  ReadLockT &GraphReadLock,
859  std::vector<Command *> &ToCleanUp,
860  bool LockTheLock = true);
861 
871  static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock,
872  EnqueueResultT &EnqueueResult,
873  std::vector<Command *> &ToCleanUp,
874  Command *RootCommand,
875  BlockingT Blocking = NON_BLOCKING);
876 
889  static bool handleBlockingCmd(Command *Cmd, EnqueueResultT &EnqueueResult,
890  Command *RootCommand, BlockingT Blocking);
891  };
892 
901  void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock);
902  bool checkLeavesCompletion(MemObjRecord *Record);
903 
907 
908  std::vector<Command *> MDeferredCleanupCommands;
910 
911  std::vector<std::shared_ptr<SYCLMemObjI>> MDeferredMemObjRelease;
913 
914  std::unordered_map<EventImplPtr, std::vector<std::shared_ptr<const void>>>
917 
919 
920  friend class Command;
921  friend class DispatchHostTask;
922  friend class queue_impl;
923  friend class event_impl;
924  friend class ::MockScheduler;
925 
926 private:
927  static void printFusionWarning(const std::string &Message);
928 
929  static KernelFusionCommand *isPartOfActiveFusion(Command *Cmd);
930 };
931 
932 } // namespace detail
933 } // namespace _V1
934 } // namespace sycl
Base class for memory allocation commands.
Definition: commands.hpp:452
Base class for all types of command groups.
Definition: cg.hpp:53
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:102
The empty command does nothing during enqueue.
Definition: commands.hpp:410
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
Definition: commands.hpp:718
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
std::function< void(Command *, Command *, MemObjRecord *, EnqueueListT &)> AllocateDependencyF
void optimize(const EventImplPtr &Event)
[Provisional] Optimizes subgraph that consists of command associated with Event passed and its depend...
void cleanupCommand(Command *Cmd, bool AllowUnsubmitted=false)
std::vector< SYCLMemObjI * > MMemObjs
Definition: scheduler.hpp:671
void decrementLeafCountersForRecord(MemObjRecord *Record)
Decrements leaf counters for all leaves of the record.
void optimize()
[Provisional] Optimizes the whole graph.
MemObjRecord * getMemObjRecord(SYCLMemObjI *MemObject)
EventImplPtr completeFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue, const property_list &)
Command * addHostAccessor(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to create a host accessor.
void cleanupCommandsForRecord(MemObjRecord *Record)
Removes commands that use the given MemObjRecord from the graph.
GraphBuildResult addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, std::vector< Command * > &ToEnqueue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers command group and adds it to the dependency graph.
void removeRecordForMemObj(SYCLMemObjI *MemObject)
Removes the MemObjRecord for the memory object passed.
Command * addCopyBack(Requirement *Req, std::vector< Command * > &ToEnqueue)
Enqueues a command to update memory to the latest state.
Command * connectDepEvent(Command *const Cmd, const EventImplPtr &DepEvent, const DepDesc &Dep, std::vector< Command * > &ToCleanUp)
Perform connection of events in multiple contexts.
Command * addCGUpdateHost(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &HostQueue, std::vector< Command * > &ToEnqueue)
Registers a command group that updates host memory to the latest state.
MemObjRecord * getOrInsertMemObjRecord(const QueueImplPtr &Queue, const Requirement *Req, std::vector< Command * > &ToEnqueue)
void cancelFusion(QueueImplPtr Queue, std::vector< Command * > &ToEnqueue)
void addNodeToLeaves(MemObjRecord *Record, Command *Cmd, access::mode AccessMode, std::vector< Command * > &ToEnqueue)
Adds new command to leaves if needed.
void rescheduleCommand(Command *Cmd, const QueueImplPtr &Queue)
Reschedules the command passed using Queue provided.
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Clean up the internal fusion commands held for the given queue.
AllocaCommandBase * findAllocaForReq(MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context, bool AllowConst=true)
Searches for suitable alloca in memory record.
DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record)
Finds a command dependency corresponding to the record.
void updateLeaves(const std::set< Command * > &Cmds, MemObjRecord *Record, access::mode AccessMode, std::vector< Command * > &ToCleanUp)
Removes commands from leaves.
Graph Processor provides interfaces for enqueueing commands and their dependencies to the underlying ...
Definition: scheduler.hpp:848
static void waitForEvent(const EventImplPtr &Event, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp, bool LockTheLock=true)
Waits for the command, associated with Event passed, is completed.
static bool enqueueCommand(Command *Cmd, ReadLockT &GraphReadLock, EnqueueResultT &EnqueueResult, std::vector< Command * > &ToCleanUp, Command *RootCommand, BlockingT Blocking=NON_BLOCKING)
Enqueues the command and all its dependencies.
static bool handleBlockingCmd(Command *Cmd, EnqueueResultT &EnqueueResult, Command *RootCommand, BlockingT Blocking)
Check if successfully enqueued command is expected to be blocking for the dependent commands before i...
DPC++ graph scheduler class.
Definition: scheduler.hpp:363
ReadLockT acquireFusionReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map.
Definition: scheduler.hpp:520
EventImplPtr addCopyBack(Requirement *Req)
Registers a command group, that copies most recent memory to the memory pointed by the requirement.
Definition: scheduler.cpp:222
static void enqueueUnblockedCommands(const std::vector< EventImplPtr > &CmdsToEnqueue, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
Definition: scheduler.cpp:380
ReadLockT acquireReadLock()
Provides shared access to std::shared_timed_mutex object with deadlock avoidance.
Definition: scheduler.hpp:516
void waitForEvent(const EventImplPtr &Event)
Waits for the event.
Definition: scheduler.cpp:268
EventImplPtr addHostAccessor(Requirement *Req)
Adds nodes to the graph, that update the requirement with the pointer to the host memory.
Definition: scheduler.cpp:306
std::unordered_map< EventImplPtr, std::vector< std::shared_ptr< const void > > > MAuxiliaryResources
Definition: scheduler.hpp:915
void registerAuxiliaryResources(EventImplPtr &Event, std::vector< std::shared_ptr< const void >> Resources)
Definition: scheduler.cpp:585
void cleanupAuxiliaryResources(BlockingT Blocking)
Definition: scheduler.cpp:592
std::unique_lock< RWLockT > WriteLockT
Definition: scheduler.hpp:474
EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &)
Definition: scheduler.cpp:630
QueueImplPtr getDefaultHostQueue()
Definition: scheduler.hpp:444
std::shared_timed_mutex RWLockT
Definition: scheduler.hpp:472
void cleanupDeferredMemObjects(BlockingT Blocking)
Definition: scheduler.cpp:513
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, ReadLockT &GraphReadLock, std::vector< Command * > &ToCleanUp)
Definition: scheduler.cpp:361
void enqueueCommandForCG(EventImplPtr NewEvent, std::vector< Command * > &AuxilaryCmds, BlockingT Blocking=NON_BLOCKING)
Definition: scheduler.cpp:165
bool isInFusionMode(QueueIdT Queue)
Definition: scheduler.cpp:644
void cancelFusion(QueueImplPtr Queue)
Definition: scheduler.cpp:620
std::shared_lock< RWLockT > ReadLockT
Definition: scheduler.hpp:473
std::vector< std::shared_ptr< SYCLMemObjI > > MDeferredMemObjRelease
Definition: scheduler.hpp:911
void startFusion(QueueImplPtr Queue)
Definition: scheduler.cpp:607
bool checkLeavesCompletion(MemObjRecord *Record)
Definition: scheduler.cpp:28
static MemObjRecord * getMemObjRecord(const Requirement *const Req)
Definition: scheduler.cpp:427
void releaseHostAccessor(Requirement *Req)
Unblocks operations with the memory object.
Definition: scheduler.cpp:345
void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock)
This function waits on all of the graph leaves which somehow use the memory object which is represent...
Definition: scheduler.cpp:44
static Scheduler & getInstance()
Definition: scheduler.cpp:260
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Definition: scheduler.cpp:613
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:94
void takeAuxiliaryResources(const EventImplPtr &Dst, const EventImplPtr &Src)
Assign Src's auxiliary resources to Dst.
Definition: scheduler.cpp:573
void cleanupCommands(const std::vector< Command * > &Cmds)
Definition: scheduler.cpp:431
const QueueImplPtr & getDefaultHostQueue() const
Definition: scheduler.hpp:446
void NotifyHostTaskCompletion(Command *Cmd)
Definition: scheduler.cpp:471
WriteLockT acquireWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance.
Definition: scheduler.hpp:478
bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock=true)
Removes buffer from the graph.
Definition: scheduler.cpp:278
WriteLockT acquireFusionWriteLock()
Provides exclusive access to std::shared_timed_mutex object with deadlock avoidance to the Fusion map...
Definition: scheduler.hpp:497
std::vector< Command * > MDeferredCleanupCommands
Definition: scheduler.hpp:908
void deferMemObjRelease(const std::shared_ptr< detail::SYCLMemObjI > &MemObj)
Definition: scheduler.cpp:500
void releaseResources(BlockingT Blocking=BlockingT::BLOCKING)
Definition: scheduler.cpp:408
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:119
std::unordered_map< QueueIdT, FusionList > FusionMap
Definition: scheduler.hpp:190
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
Definition: scheduler.hpp:187
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:33
std::unique_ptr< KernelFusionCommand > FusionList
Definition: scheduler.hpp:189
std::shared_ptr< detail::stream_impl > StreamImplPtr
Definition: commands.hpp:45
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:35
std::unique_ptr< Command > CommandPtr
Definition: scheduler.hpp:188
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
Definition: accessor.hpp:3233
Definition: access.hpp:18
Dependency between two commands.
Definition: commands.hpp:76
Result of command enqueueing.
Definition: commands.hpp:57
Memory Object Record.
Definition: scheduler.hpp:198
MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency)
Definition: scheduler.hpp:199
std::vector< AllocaCommandBase * > MAllocaCommands
Definition: scheduler.hpp:205