DPC++ Runtime
Runtime libraries for oneAPI DPC++
scheduler.hpp
Go to the documentation of this file.
1 //==-------------- scheduler.hpp - SYCL standard header file ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/sycl/detail/cg.hpp>
15 
16 #include <cstddef>
17 #include <memory>
18 #include <queue>
19 #include <set>
20 #include <shared_mutex>
21 #include <unordered_map>
22 #include <unordered_set>
23 #include <vector>
24 
170 
171 // For testing purposes
172 class MockScheduler;
173 
175 namespace sycl {
176 namespace detail {
177 
178 class queue_impl;
179 class event_impl;
180 class context_impl;
181 class DispatchHostTask;
182 
183 using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
184 using EventImplPtr = std::shared_ptr<detail::event_impl>;
185 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
186 
193 struct MemObjRecord {
194  MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit,
195  LeavesCollection::AllocateDependencyF AllocateDependency)
196  : MReadLeaves{this, LeafLimit, AllocateDependency},
197  MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {}
198 
199  // Contains all allocation commands for the memory object.
200  std::vector<AllocaCommandBase *> MAllocaCommands;
201 
202  // Contains latest read only commands working with memory object.
204 
205  // Contains latest write commands working with memory object.
207 
208  // The context which has the latest state of the memory object.
210 
211  // The mode this object can be accessed with from the host context.
212  // Valid only if the current context is host.
213  access::mode MHostAccess = access::mode::read_write;
214 
215  // The flag indicates that the content of the memory object was/will be
216  // modified. Used while deciding if copy back needed.
217  bool MMemModified = false;
218 };
219 
254 //
255 // +----------+ +----------+ +----------+
256 // | | | | | |
257 // | Allocate |<----| Execute |<----| Execute |
258 // | | | | | |
259 // +----------+ +----------+ +----------+
260 //
288 // +----------+
289 // | |
290 // | D |
291 // | |
292 // +----------+
293 // / \
294 // / \
295 // v v
296 // +----------+ +----------+
297 // | | | |
298 // | B | | C |
299 // | | | |
300 // +----------+ +----------+
301 // \ /
302 // \ /
303 // v v
304 // +----------+
305 // | |
306 // | A |
307 // | |
308 // +----------+
358 class Scheduler {
359 public:
366  EventImplPtr addCG(std::unique_ptr<detail::CG> CommandGroup,
367  QueueImplPtr Queue);
368 
375  EventImplPtr addCopyBack(Requirement *Req);
376 
383  void waitForEvent(EventImplPtr Event);
384 
400  void removeMemoryObject(detail::SYCLMemObjI *MemObj);
401 
407  void cleanupFinishedCommands(EventImplPtr FinishedEvent);
408 
419  EventImplPtr addHostAccessor(Requirement *Req);
420 
425  void releaseHostAccessor(Requirement *Req);
426 
428  static Scheduler &getInstance();
429 
435  void allocateStreamBuffers(stream_impl *, size_t, size_t);
436 
440  void deallocateStreamBuffers(stream_impl *);
441 
442  QueueImplPtr getDefaultHostQueue() { return DefaultHostQueue; }
443 
444  static MemObjRecord *getMemObjRecord(const Requirement *const Req);
445 
446  Scheduler();
447  ~Scheduler();
448 
449 protected:
450  // TODO: after switching to C++17, change std::shared_timed_mutex to
451  // std::shared_mutex
452  using RWLockT = std::shared_timed_mutex;
453  using ReadLockT = std::shared_lock<RWLockT>;
454  using WriteLockT = std::unique_lock<RWLockT>;
455 
460  void acquireWriteLock(WriteLockT &Lock);
461 
462  void cleanupCommands(const std::vector<Command *> &Cmds);
463 
464  static void enqueueLeavesOfReqUnlocked(const Requirement *const Req,
465  std::vector<Command *> &ToCleanUp);
466 
473  class GraphBuilder {
474  public:
475  GraphBuilder();
476 
482  Command *addCG(std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
483  std::vector<Command *> &ToEnqueue);
484 
489  Command *addCGUpdateHost(std::unique_ptr<detail::CG> CommandGroup,
490  QueueImplPtr HostQueue,
491  std::vector<Command *> &ToEnqueue);
492 
496  Command *addCopyBack(Requirement *Req, std::vector<Command *> &ToEnqueue);
497 
501  Command *addHostAccessor(Requirement *Req,
502  std::vector<Command *> &ToEnqueue);
503 
505  void optimize();
506 
509  void optimize(EventImplPtr Event);
510 
511  void cleanupCommand(Command *Cmd);
512 
515  void cleanupFinishedCommands(
516  Command *FinishedCmd,
517  std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &,
518  std::vector<std::shared_ptr<const void>> &);
519 
526  void rescheduleCommand(Command *Cmd, QueueImplPtr Queue);
527 
530  MemObjRecord *getMemObjRecord(SYCLMemObjI *MemObject);
531 
534  MemObjRecord *getOrInsertMemObjRecord(const QueueImplPtr &Queue,
535  const Requirement *Req,
536  std::vector<Command *> &ToEnqueue);
537 
539  void decrementLeafCountersForRecord(MemObjRecord *Record);
540 
542  void cleanupCommandsForRecord(
543  MemObjRecord *Record,
544  std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &,
545  std::vector<std::shared_ptr<const void>> &);
546 
548  void removeRecordForMemObj(SYCLMemObjI *MemObject);
549 
551  void addNodeToLeaves(MemObjRecord *Record, Command *Cmd,
552  access::mode AccessMode,
553  std::vector<Command *> &ToEnqueue);
554 
556  void updateLeaves(const std::set<Command *> &Cmds, MemObjRecord *Record,
557  access::mode AccessMode,
558  std::vector<Command *> &ToCleanUp);
559 
569  Command *connectDepEvent(Command *const Cmd, EventImplPtr DepEvent,
570  const DepDesc &Dep,
571  std::vector<Command *> &ToCleanUp);
572 
573  std::vector<SYCLMemObjI *> MMemObjs;
574 
575  private:
585  Command *insertMemoryMove(MemObjRecord *Record, Requirement *Req,
586  const QueueImplPtr &Queue,
587  std::vector<Command *> &ToEnqueue);
588 
589  // Inserts commands required to remap the memory object to its current host
590  // context so that the required access mode becomes valid.
591  Command *remapMemoryObject(MemObjRecord *Record, Requirement *Req,
592  AllocaCommandBase *HostAllocaCmd,
593  std::vector<Command *> &ToEnqueue);
594 
596  insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req,
597  const QueueImplPtr &Queue,
598  std::vector<Command *> &ToEnqueue);
599 
601  std::set<Command *> findDepsForReq(MemObjRecord *Record,
602  const Requirement *Req,
603  const ContextImplPtr &Context);
604 
605  template <typename T>
606  typename detail::enable_if_t<
607  std::is_same<typename std::remove_cv_t<T>, Requirement>::value,
608  EmptyCommand *>
609  addEmptyCmd(Command *Cmd, const std::vector<T *> &Req,
610  const QueueImplPtr &Queue, Command::BlockReason Reason,
611  std::vector<Command *> &ToEnqueue,
612  const bool AddDepsToLeaves = true);
613 
614  protected:
616  DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record);
617 
619  AllocaCommandBase *findAllocaForReq(MemObjRecord *Record,
620  const Requirement *Req,
621  const ContextImplPtr &Context);
622 
623  friend class Command;
624 
625  private:
626  friend class ::MockScheduler;
627 
632  getOrCreateAllocaForReq(MemObjRecord *Record, const Requirement *Req,
633  QueueImplPtr Queue,
634  std::vector<Command *> &ToEnqueue);
635 
636  void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req);
637 
639  std::queue<Command *> MCmdsToVisit;
641  std::vector<Command *> MVisitedCmds;
646  void printGraphAsDot(const char *ModeName);
647  enum PrintOptions {
648  BeforeAddCG = 0,
649  AfterAddCG,
650  BeforeAddCopyBack,
651  AfterAddCopyBack,
652  BeforeAddHostAcc,
653  AfterAddHostAcc,
654  Size
655  };
656  std::array<bool, PrintOptions::Size> MPrintOptionsArray{false};
657  };
658 
732  public:
740  static void waitForEvent(EventImplPtr Event, ReadLockT &GraphReadLock,
741  std::vector<Command *> &ToCleanUp,
742  bool LockTheLock = true);
743 
752  static bool enqueueCommand(Command *Cmd, EnqueueResultT &EnqueueResult,
753  std::vector<Command *> &ToCleanUp,
754  BlockingT Blocking = NON_BLOCKING);
755  };
756 
765  void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock);
766 
769 
770  std::vector<Command *> MDeferredCleanupCommands;
772 
774 
775  friend class Command;
776  friend class DispatchHostTask;
777  friend class queue_impl;
778  friend class event_impl;
779 
783  struct StreamBuffers {
784  StreamBuffers(size_t StreamBufferSize, size_t FlushBufferSize)
785  // Initialize stream buffer with zeros, this is needed for two reasons:
786  // 1. We don't need to care about end of line when printing out
787  // streamed data.
788  // 2. Offset is properly initialized.
789  : Data(StreamBufferSize, 0),
790  Buf(Data.data(), range<1>(StreamBufferSize),
792  FlushBuf(range<1>(FlushBufferSize)) {
793  // Disable copy back on buffer destruction. Copy is scheduled as a host
794  // task which fires up as soon as kernel has completed exectuion.
795  Buf.set_write_back(false);
796  FlushBuf.set_write_back(false);
797  }
798 
799  // Vector on the host side which is used to initialize the stream
800  // buffer
801  std::vector<char> Data;
802 
803  // Stream buffer
805 
806  // Global flush buffer
808  };
809 
810  friend class stream_impl;
811  friend void initStream(StreamImplPtr, QueueImplPtr);
812 
813  // Protects stream buffers pool
814  std::recursive_mutex StreamBuffersPoolMutex;
815 
816  // We need to store a pointer to the structure with stream buffers because we
817  // want to avoid a situation when buffers are destructed during destruction of
818  // the scheduler. Scheduler is a global object and it can be destructed after
819  // all device runtimes are unloaded. Destruction of the buffers at this stage
820  // will lead to a faliure. In the correct program there will be sync points
821  // for all kernels and all allocated resources will be released by the
822  // scheduler. If program is not correct and doesn't have necessary sync point
823  // then warning will be issued.
824  std::unordered_map<stream_impl *, StreamBuffers *> StreamBuffersPool;
825 };
826 
827 } // namespace detail
828 } // namespace sycl
829 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::NON_BLOCKING
@ NON_BLOCKING
Definition: commands.hpp:47
cl::sycl::detail::Scheduler
DPC++ graph scheduler class.
Definition: scheduler.hpp:358
cl::sycl::detail::MemObjRecord::MemObjRecord
MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency)
Definition: scheduler.hpp:194
cl::sycl::detail::stream_impl
Definition: stream_impl.hpp:25
cl::sycl::property::buffer::use_host_ptr
Definition: buffer_properties.hpp:20
cg.hpp
cl::sycl::detail::MemObjRecord::MCurContext
ContextImplPtr MCurContext
Definition: scheduler.hpp:209
cl::sycl::detail::Command::BlockReason
BlockReason
Definition: commands.hpp:273
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:32
sycl_mem_obj_i.hpp
cl::sycl::detail::Scheduler::StreamBuffersPoolMutex
std::recursive_mutex StreamBuffersPoolMutex
Definition: scheduler.hpp:814
cl::sycl::detail::Scheduler::StreamBuffers
Stream buffers structure.
Definition: scheduler.hpp:783
cl::sycl::detail::Scheduler::StreamBuffers::Data
std::vector< char > Data
Definition: scheduler.hpp:801
cl::sycl::detail::BlockingT
BlockingT
Definition: commands.hpp:47
cl::sycl::detail::MemObjRecord
Memory Object Record.
Definition: scheduler.hpp:193
cl::sycl::detail::Scheduler::GraphBuilder
Graph builder class.
Definition: scheduler.hpp:473
cl::sycl::detail::MemObjRecord::MWriteLeaves
LeavesCollection MWriteLeaves
Definition: scheduler.hpp:206
leaves_collection.hpp
sycl
Definition: invoke_simd.hpp:68
cl::sycl::detail::Scheduler::StreamBuffers::Buf
buffer< char, 1 > Buf
Definition: scheduler.hpp:804
cl::sycl::buffer< char, 1 >
cl::sycl::range
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: buffer.hpp:24
cl::sycl::detail::MemObjRecord::MReadLeaves
LeavesCollection MReadLeaves
Definition: scheduler.hpp:203
cl::sycl::detail::EnqueueResultT
Result of command enqueueing.
Definition: commands.hpp:50
cl::sycl::detail::LeavesCollection::AllocateDependencyF
std::function< void(Command *, Command *, MemObjRecord *, EnqueueListT &)> AllocateDependencyF
Definition: leaves_collection.hpp:46
cl::sycl::detail::AccessorImplHost
Definition: accessor_impl.hpp:74
cl::sycl::detail::DepDesc
Dependency between two commands.
Definition: commands.hpp:69
cl::sycl::detail::StreamImplPtr
std::shared_ptr< detail::stream_impl > StreamImplPtr
Definition: commands.hpp:38
cl::sycl::detail::EventImplPtr
std::shared_ptr< detail::event_impl > EventImplPtr
Definition: memory_manager.hpp:31
commands.hpp
cl::sycl::detail::AllocaCommandBase
Base class for memory allocation commands.
Definition: commands.hpp:364
cl::sycl::detail::Scheduler::DefaultHostQueue
QueueImplPtr DefaultHostQueue
Definition: scheduler.hpp:773
cl::sycl::detail::Command
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:95
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::Scheduler::getDefaultHostQueue
QueueImplPtr getDefaultHostQueue()
Definition: scheduler.hpp:442
cl::sycl::detail::Scheduler::GraphBuilder::MMemObjs
std::vector< SYCLMemObjI * > MMemObjs
Definition: scheduler.hpp:573
cl::sycl::detail::Scheduler::RWLockT
std::shared_timed_mutex RWLockT
Definition: scheduler.hpp:452
cl::sycl::detail::UpdateHostRequirementCommand
Definition: commands.hpp:574
cl::sycl::detail::queue_impl
Definition: queue_impl.hpp:54
cl::sycl::detail::Scheduler::MGraphBuilder
GraphBuilder MGraphBuilder
Definition: scheduler.hpp:767
cl::sycl::detail::QueueImplPtr
std::shared_ptr< detail::queue_impl > QueueImplPtr
Definition: memory_manager.hpp:30
cl::sycl::detail::Scheduler::MDeferredCleanupCommands
std::vector< Command * > MDeferredCleanupCommands
Definition: scheduler.hpp:770
cl::sycl::detail::Scheduler::StreamBuffersPool
std::unordered_map< stream_impl *, StreamBuffers * > StreamBuffersPool
Definition: scheduler.hpp:824
cl::sycl::detail::Scheduler::ReadLockT
std::shared_lock< RWLockT > ReadLockT
Definition: scheduler.hpp:453
cl::sycl::detail::Scheduler::GraphProcessor
Graph Processor provides interfaces for enqueueing commands and their dependencies to the underlying ...
Definition: scheduler.hpp:731
cl::sycl::detail::DispatchHostTask
Definition: commands.cpp:225
cl::sycl::detail::Scheduler::WriteLockT
std::unique_lock< RWLockT > WriteLockT
Definition: scheduler.hpp:454
cl::sycl::detail::Scheduler::StreamBuffers::StreamBuffers
StreamBuffers(size_t StreamBufferSize, size_t FlushBufferSize)
Definition: scheduler.hpp:784
cl::sycl::detail::Scheduler::MGraphLock
RWLockT MGraphLock
Definition: scheduler.hpp:768
cl::sycl::detail::MemObjRecord::MAllocaCommands
std::vector< AllocaCommandBase * > MAllocaCommands
Definition: scheduler.hpp:200
cl::sycl::detail::initStream
void initStream(StreamImplPtr Stream, QueueImplPtr Queue)
Definition: scheduler_helpers.cpp:19
cl::sycl::detail::event_impl
Definition: event_impl.hpp:33
cl::sycl::access::mode
mode
Definition: access.hpp:28
cl::sycl::detail::LeavesCollection
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
Definition: leaves_collection.hpp:38
cl::sycl::detail::Scheduler::MDeferredCleanupMutex
std::mutex MDeferredCleanupMutex
Definition: scheduler.hpp:771
cl::sycl::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
cl::sycl::detail::EmptyCommand
The empty command does nothing during enqueue.
Definition: commands.hpp:323
cl::sycl::detail::SYCLMemObjI
Definition: sycl_mem_obj_i.hpp:28
cl::sycl::detail::Scheduler::StreamBuffers::FlushBuf
buffer< char, 1 > FlushBuf
Definition: scheduler.hpp:807
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12