DPC++ Runtime
Runtime libraries for oneAPI DPC++
graph_builder.cpp
Go to the documentation of this file.
1 //===-- graph_builder.cpp - SYCL Graph Builder ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "detail/config.hpp"
12 #include <CL/sycl/exception.hpp>
13 #include <detail/context_impl.hpp>
14 #include <detail/event_impl.hpp>
15 #include <detail/queue_impl.hpp>
17 
18 #include <cstdlib>
19 #include <cstring>
20 #include <fstream>
21 #include <map>
22 #include <memory>
23 #include <queue>
24 #include <set>
25 #include <vector>
26 
28 namespace sycl {
29 namespace detail {
30 
35 // TODO merge with LeavesCollection's version of doOverlap (see
36 // leaves_collection.cpp).
37 static bool doOverlap(const Requirement *LHS, const Requirement *RHS) {
38  return (LHS->MOffsetInBytes + LHS->MAccessRange.size() * LHS->MElemSize >=
39  RHS->MOffsetInBytes) ||
40  (RHS->MOffsetInBytes + RHS->MAccessRange.size() * RHS->MElemSize >=
41  LHS->MOffsetInBytes);
42 }
43 
44 static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS) {
45  // Consider two different host contexts to be the same to avoid additional
46  // allocation on the host
47  return LHS == RHS || (LHS->is_host() && RHS->is_host());
48 }
49 
51 static bool IsSuitableSubReq(const Requirement *Req) {
52  return Req->MIsSubBuffer;
53 }
54 
56 static bool isAccessModeAllowed(access::mode Required, access::mode Current) {
57  switch (Current) {
58  case access::mode::read:
59  return (Required == Current);
61  assert(false && "Write only access is expected to be mapped as read_write");
62  return (Required == Current || Required == access::mode::discard_write);
63  case access::mode::read_write:
64  case access::mode::atomic:
65  case access::mode::discard_write:
66  case access::mode::discard_read_write:
67  return true;
68  }
69  assert(false);
70  return false;
71 }
72 
75  if (A == B)
76  return A;
77 
78  if (A == access::mode::discard_write &&
79  (B == access::mode::discard_read_write || B == access::mode::write))
80  return B;
81 
82  if (B == access::mode::discard_write &&
83  (A == access::mode::discard_read_write || A == access::mode::write))
84  return A;
85 
86  return access::mode::read_write;
87 }
88 
89 Scheduler::GraphBuilder::GraphBuilder() {
90  if (const char *EnvVarCStr = SYCLConfig<SYCL_PRINT_EXECUTION_GRAPH>::get()) {
91  std::string GraphPrintOpts(EnvVarCStr);
92  bool EnableAlways = GraphPrintOpts.find("always") != std::string::npos;
93 
94  if (GraphPrintOpts.find("before_addCG") != std::string::npos ||
95  EnableAlways)
96  MPrintOptionsArray[BeforeAddCG] = true;
97  if (GraphPrintOpts.find("after_addCG") != std::string::npos || EnableAlways)
98  MPrintOptionsArray[AfterAddCG] = true;
99  if (GraphPrintOpts.find("before_addCopyBack") != std::string::npos ||
100  EnableAlways)
101  MPrintOptionsArray[BeforeAddCopyBack] = true;
102  if (GraphPrintOpts.find("after_addCopyBack") != std::string::npos ||
103  EnableAlways)
104  MPrintOptionsArray[AfterAddCopyBack] = true;
105  if (GraphPrintOpts.find("before_addHostAcc") != std::string::npos ||
106  EnableAlways)
107  MPrintOptionsArray[BeforeAddHostAcc] = true;
108  if (GraphPrintOpts.find("after_addHostAcc") != std::string::npos ||
109  EnableAlways)
110  MPrintOptionsArray[AfterAddHostAcc] = true;
111  }
112 }
113 
114 static bool markNodeAsVisited(Command *Cmd, std::vector<Command *> &Visited) {
115  assert(Cmd && "Cmd can't be nullptr");
116  if (Cmd->MMarks.MVisited)
117  return false;
118  Cmd->MMarks.MVisited = true;
119  Visited.push_back(Cmd);
120  return true;
121 }
122 
123 static void unmarkVisitedNodes(std::vector<Command *> &Visited) {
124  for (Command *Cmd : Visited)
125  Cmd->MMarks.MVisited = false;
126 }
127 
128 static void handleVisitedNodes(std::vector<Command *> &Visited) {
129  for (Command *Cmd : Visited) {
130  if (Cmd->MMarks.MToBeDeleted) {
131  Cmd->getEvent()->setCommand(nullptr);
132  delete Cmd;
133  } else
134  Cmd->MMarks.MVisited = false;
135  }
136 }
137 
138 static void printDotRecursive(std::fstream &Stream,
139  std::vector<Command *> &Visited, Command *Cmd) {
140  if (!markNodeAsVisited(Cmd, Visited))
141  return;
142  for (Command *User : Cmd->MUsers) {
143  if (User)
144  printDotRecursive(Stream, Visited, User);
145  }
146  Cmd->printDot(Stream);
147 }
148 
149 void Scheduler::GraphBuilder::printGraphAsDot(const char *ModeName) {
150  static size_t Counter = 0;
151  std::string ModeNameStr(ModeName);
152  std::string FileName =
153  "graph_" + std::to_string(Counter) + ModeNameStr + ".dot";
154 
155  Counter++;
156 
157  std::fstream Stream(FileName, std::ios::out);
158  Stream << "strict digraph {" << std::endl;
159 
160  MVisitedCmds.clear();
161 
162  for (SYCLMemObjI *MemObject : MMemObjs)
163  for (Command *AllocaCmd : MemObject->MRecord->MAllocaCommands)
164  printDotRecursive(Stream, MVisitedCmds, AllocaCmd);
165 
166  Stream << "}" << std::endl;
167 
168  unmarkVisitedNodes(MVisitedCmds);
169 }
170 
171 MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) {
172  return MemObject->MRecord.get();
173 }
174 
175 MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
176  const QueueImplPtr &Queue, const Requirement *Req,
177  std::vector<Command *> &ToEnqueue) {
178  SYCLMemObjI *MemObject = Req->MSYCLMemObj;
179  MemObjRecord *Record = getMemObjRecord(MemObject);
180 
181  if (nullptr != Record)
182  return Record;
183 
184  const size_t LeafLimit = 8;
185  LeavesCollection::AllocateDependencyF AllocateDependency =
186  [this](Command *Dependant, Command *Dependency, MemObjRecord *Record,
187  LeavesCollection::EnqueueListT &ToEnqueue) {
188  // Add the old leaf as a dependency for the new one by duplicating one
189  // of the requirements for the current record
190  DepDesc Dep = findDepForRecord(Dependant, Record);
191  Dep.MDepCommand = Dependency;
192  std::vector<Command *> ToCleanUp;
193  Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp);
194  if (ConnectionCmd)
195  ToEnqueue.push_back(ConnectionCmd);
196 
197  --(Dependency->MLeafCounter);
198  if (Dependency->MLeafCounter == 0 &&
199  Dependency->isSuccessfullyEnqueued() &&
200  Dependency->supportsPostEnqueueCleanup())
201  ToCleanUp.push_back(Dependency);
202  for (Command *Cmd : ToCleanUp)
203  cleanupCommand(Cmd);
204  };
205 
206  const ContextImplPtr &InteropCtxPtr = Req->MSYCLMemObj->getInteropContext();
207  if (InteropCtxPtr) {
208  // The memory object has been constructed using interoperability constructor
209  // which means that there is already an allocation(cl_mem) in some context.
210  // Registering this allocation in the SYCL graph.
211 
212  std::vector<sycl::device> Devices =
213  InteropCtxPtr->get_info<info::context::devices>();
214  assert(Devices.size() != 0);
215  DeviceImplPtr Dev = detail::getSyclObjImpl(Devices[0]);
216 
217  // Since all the Scheduler commands require queue but we have only context
218  // here, we need to create a dummy queue bound to the context and one of the
219  // devices from the context.
220  QueueImplPtr InteropQueuePtr{new detail::queue_impl{
221  Dev, InteropCtxPtr, /*AsyncHandler=*/{}, /*PropertyList=*/{}}};
222 
223  MemObject->MRecord.reset(
224  new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency});
225  getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
226  ToEnqueue);
227  } else
228  MemObject->MRecord.reset(new MemObjRecord{Queue->getContextImplPtr(),
229  LeafLimit, AllocateDependency});
230 
231  MMemObjs.push_back(MemObject);
232  return MemObject->MRecord.get();
233 }
234 
235 void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
236  MemObjRecord *Record,
237  access::mode AccessMode,
238  std::vector<Command *> &ToCleanUp) {
239 
240  const bool ReadOnlyReq = AccessMode == access::mode::read;
241  if (ReadOnlyReq)
242  return;
243 
244  for (Command *Cmd : Cmds) {
245  bool WasLeaf = Cmd->MLeafCounter > 0;
246  Cmd->MLeafCounter -= Record->MReadLeaves.remove(Cmd);
247  Cmd->MLeafCounter -= Record->MWriteLeaves.remove(Cmd);
248  if (WasLeaf && Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued() &&
249  Cmd->supportsPostEnqueueCleanup()) {
250  ToCleanUp.push_back(Cmd);
251  }
252  }
253 }
254 
255 void Scheduler::GraphBuilder::addNodeToLeaves(
256  MemObjRecord *Record, Command *Cmd, access::mode AccessMode,
257  std::vector<Command *> &ToEnqueue) {
258  LeavesCollection &Leaves{AccessMode == access::mode::read
259  ? Record->MReadLeaves
260  : Record->MWriteLeaves};
261  if (Leaves.push_back(Cmd, ToEnqueue))
262  ++Cmd->MLeafCounter;
263 }
264 
265 UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
266  MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
267  std::vector<Command *> &ToEnqueue) {
268  AllocaCommandBase *AllocaCmd =
269  findAllocaForReq(Record, Req, Queue->getContextImplPtr());
270  assert(AllocaCmd && "There must be alloca for requirement!");
271  UpdateHostRequirementCommand *UpdateCommand =
272  new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData);
273  // Need copy of requirement because after host accessor destructor call
274  // dependencies become invalid if requirement is stored by pointer.
275  const Requirement *StoredReq = UpdateCommand->getRequirement();
276 
277  std::set<Command *> Deps =
278  findDepsForReq(Record, Req, Queue->getContextImplPtr());
279  std::vector<Command *> ToCleanUp;
280  for (Command *Dep : Deps) {
281  Command *ConnCmd =
282  UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd}, ToCleanUp);
283  if (ConnCmd)
284  ToEnqueue.push_back(ConnCmd);
285  }
286  updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
287  addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode, ToEnqueue);
288  for (Command *Cmd : ToCleanUp)
289  cleanupCommand(Cmd);
290  return UpdateCommand;
291 }
292 
293 // Takes linked alloca commands. Makes AllocaCmdDst command active using map
294 // or unmap operation.
296  AllocaCommandBase *AllocaCmdDst,
297  access::mode MapMode) {
298  assert(AllocaCmdSrc->MLinkedAllocaCmd == AllocaCmdDst &&
299  "Expected linked alloca commands");
300  assert(AllocaCmdSrc->MIsActive &&
301  "Expected source alloca command to be active");
302 
303  if (AllocaCmdSrc->getQueue()->is_host()) {
304  UnMapMemObject *UnMapCmd = new UnMapMemObject(
305  AllocaCmdDst, *AllocaCmdDst->getRequirement(),
306  &AllocaCmdSrc->MMemAllocation, AllocaCmdDst->getQueue());
307 
308  std::swap(AllocaCmdSrc->MIsActive, AllocaCmdDst->MIsActive);
309 
310  return UnMapCmd;
311  }
312 
313  MapMemObject *MapCmd = new MapMemObject(
314  AllocaCmdSrc, *AllocaCmdSrc->getRequirement(),
315  &AllocaCmdDst->MMemAllocation, AllocaCmdSrc->getQueue(), MapMode);
316 
317  std::swap(AllocaCmdSrc->MIsActive, AllocaCmdDst->MIsActive);
318 
319  return MapCmd;
320 }
321 
322 Command *Scheduler::GraphBuilder::insertMemoryMove(
323  MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
324  std::vector<Command *> &ToEnqueue) {
325 
326  AllocaCommandBase *AllocaCmdDst =
327  getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
328  if (!AllocaCmdDst)
329  throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY);
330 
331  std::set<Command *> Deps =
332  findDepsForReq(Record, Req, Queue->getContextImplPtr());
333  Deps.insert(AllocaCmdDst);
334  // Get parent allocation of sub buffer to perform full copy of whole buffer
335  if (IsSuitableSubReq(Req)) {
336  if (AllocaCmdDst->getType() == Command::CommandType::ALLOCA_SUB_BUF)
337  AllocaCmdDst =
338  static_cast<AllocaSubBufCommand *>(AllocaCmdDst)->getParentAlloca();
339  }
340 
341  AllocaCommandBase *AllocaCmdSrc =
342  findAllocaForReq(Record, Req, Record->MCurContext);
343  if (!AllocaCmdSrc && IsSuitableSubReq(Req)) {
344  // Since no alloca command for the sub buffer requirement was found in the
345  // current context, need to find a parent alloca command for it (it must be
346  // there)
347  auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) {
348  bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(),
349  Record->MCurContext) &&
350  // Looking for a parent buffer alloca command
351  AllocaCmd->getType() == Command::CommandType::ALLOCA;
352  return Res;
353  };
354  const auto It =
355  std::find_if(Record->MAllocaCommands.begin(),
356  Record->MAllocaCommands.end(), IsSuitableAlloca);
357  AllocaCmdSrc = (Record->MAllocaCommands.end() != It) ? *It : nullptr;
358  }
359  if (!AllocaCmdSrc)
360  throw runtime_error("Cannot find buffer allocation", PI_INVALID_VALUE);
361  // Get parent allocation of sub buffer to perform full copy of whole buffer
362  if (IsSuitableSubReq(Req)) {
363  if (AllocaCmdSrc->getType() == Command::CommandType::ALLOCA_SUB_BUF)
364  AllocaCmdSrc =
365  static_cast<AllocaSubBufCommand *>(AllocaCmdSrc)->getParentAlloca();
366  else if (AllocaCmdSrc->getSYCLMemObj() != Req->MSYCLMemObj)
367  assert(false && "Inappropriate alloca command.");
368  }
369 
370  Command *NewCmd = nullptr;
371 
372  if (AllocaCmdSrc->MLinkedAllocaCmd == AllocaCmdDst) {
373  // Map write only as read-write
374  access::mode MapMode = Req->MAccessMode;
375  if (MapMode == access::mode::write)
376  MapMode = access::mode::read_write;
377  NewCmd = insertMapUnmapForLinkedCmds(AllocaCmdSrc, AllocaCmdDst, MapMode);
378  Record->MHostAccess = MapMode;
379  } else {
380 
381  if ((Req->MAccessMode == access::mode::discard_write) ||
382  (Req->MAccessMode == access::mode::discard_read_write)) {
383  Record->MCurContext = Queue->getContextImplPtr();
384  return nullptr;
385  } else {
386  // Full copy of buffer is needed to avoid loss of data that may be caused
387  // by copying specific range from host to device and backwards.
388  NewCmd =
389  new MemCpyCommand(*AllocaCmdSrc->getRequirement(), AllocaCmdSrc,
390  *AllocaCmdDst->getRequirement(), AllocaCmdDst,
391  AllocaCmdSrc->getQueue(), AllocaCmdDst->getQueue());
392  }
393  }
394  std::vector<Command *> ToCleanUp;
395  for (Command *Dep : Deps) {
396  Command *ConnCmd = NewCmd->addDep(
397  DepDesc{Dep, NewCmd->getRequirement(), AllocaCmdDst}, ToCleanUp);
398  if (ConnCmd)
399  ToEnqueue.push_back(ConnCmd);
400  }
401  updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp);
402  addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue);
403  for (Command *Cmd : ToCleanUp)
404  cleanupCommand(Cmd);
405  Record->MCurContext = Queue->getContextImplPtr();
406  return NewCmd;
407 }
408 
409 Command *Scheduler::GraphBuilder::remapMemoryObject(
410  MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd,
411  std::vector<Command *> &ToEnqueue) {
412  assert(HostAllocaCmd->getQueue()->is_host() &&
413  "Host alloca command expected");
414  assert(HostAllocaCmd->MIsActive && "Active alloca command expected");
415 
416  AllocaCommandBase *LinkedAllocaCmd = HostAllocaCmd->MLinkedAllocaCmd;
417  assert(LinkedAllocaCmd && "Linked alloca command expected");
418 
419  std::set<Command *> Deps = findDepsForReq(Record, Req, Record->MCurContext);
420 
421  UnMapMemObject *UnMapCmd = new UnMapMemObject(
422  LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(),
423  &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue());
424 
425  // Map write only as read-write
426  access::mode MapMode = Req->MAccessMode;
427  if (MapMode == access::mode::write)
428  MapMode = access::mode::read_write;
429  MapMemObject *MapCmd = new MapMemObject(
430  LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(),
431  &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue(), MapMode);
432 
433  std::vector<Command *> ToCleanUp;
434  for (Command *Dep : Deps) {
435  Command *ConnCmd = UnMapCmd->addDep(
436  DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd}, ToCleanUp);
437  if (ConnCmd)
438  ToEnqueue.push_back(ConnCmd);
439  }
440 
441  Command *ConnCmd = MapCmd->addDep(
442  DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd}, ToCleanUp);
443  if (ConnCmd)
444  ToEnqueue.push_back(ConnCmd);
445 
446  updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp);
447  addNodeToLeaves(Record, MapCmd, access::mode::read_write, ToEnqueue);
448  for (Command *Cmd : ToCleanUp)
449  cleanupCommand(Cmd);
450  Record->MHostAccess = MapMode;
451  return MapCmd;
452 }
453 
454 // The function adds copy operation of the up to date'st memory to the memory
455 // pointed by Req.
456 Command *
457 Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
458  std::vector<Command *> &ToEnqueue) {
459  QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue();
460  SYCLMemObjI *MemObj = Req->MSYCLMemObj;
461  MemObjRecord *Record = getMemObjRecord(MemObj);
462  if (Record && MPrintOptionsArray[BeforeAddCopyBack])
463  printGraphAsDot("before_addCopyBack");
464 
465  // Do nothing if there were no or only read operations with the memory object.
466  if (nullptr == Record || !Record->MMemModified)
467  return nullptr;
468 
469  std::set<Command *> Deps =
470  findDepsForReq(Record, Req, HostQueue->getContextImplPtr());
471  AllocaCommandBase *SrcAllocaCmd =
472  findAllocaForReq(Record, Req, Record->MCurContext);
473 
474  auto MemCpyCmdUniquePtr = std::make_unique<MemCpyCommandHost>(
475  *SrcAllocaCmd->getRequirement(), SrcAllocaCmd, *Req, &Req->MData,
476  SrcAllocaCmd->getQueue(), std::move(HostQueue));
477 
478  if (!MemCpyCmdUniquePtr)
479  throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY);
480 
481  MemCpyCommandHost *MemCpyCmd = MemCpyCmdUniquePtr.release();
482 
483  std::vector<Command *> ToCleanUp;
484  for (Command *Dep : Deps) {
485  Command *ConnCmd = MemCpyCmd->addDep(
486  DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp);
487  if (ConnCmd)
488  ToEnqueue.push_back(ConnCmd);
489  }
490 
491  updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
492  addNodeToLeaves(Record, MemCpyCmd, Req->MAccessMode, ToEnqueue);
493  for (Command *Cmd : ToCleanUp)
494  cleanupCommand(Cmd);
495  if (MPrintOptionsArray[AfterAddCopyBack])
496  printGraphAsDot("after_addCopyBack");
497  return MemCpyCmd;
498 }
499 
500 // The function implements SYCL host accessor logic: host accessor
501 // should provide access to the buffer in user space.
502 Command *
503 Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
504  std::vector<Command *> &ToEnqueue) {
505 
506  const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue();
507 
508  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
509  if (MPrintOptionsArray[BeforeAddHostAcc])
510  printGraphAsDot("before_addHostAccessor");
511  markModifiedIfWrite(Record, Req);
512 
513  AllocaCommandBase *HostAllocaCmd =
514  getOrCreateAllocaForReq(Record, Req, HostQueue, ToEnqueue);
515 
516  if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
517  Record->MCurContext)) {
518  if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
519  remapMemoryObject(Record, Req, HostAllocaCmd, ToEnqueue);
520  } else
521  insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
522 
523  Command *UpdateHostAccCmd =
524  insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue);
525 
526  // Need empty command to be blocked until host accessor is destructed
527  EmptyCommand *EmptyCmd =
528  addEmptyCmd<Requirement>(UpdateHostAccCmd, {Req}, HostQueue,
529  Command::BlockReason::HostAccessor, ToEnqueue);
530 
531  Req->MBlockedCmd = EmptyCmd;
532 
533  if (MPrintOptionsArray[AfterAddHostAcc])
534  printGraphAsDot("after_addHostAccessor");
535 
536  return UpdateHostAccCmd;
537 }
538 
539 Command *Scheduler::GraphBuilder::addCGUpdateHost(
540  std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr HostQueue,
541  std::vector<Command *> &ToEnqueue) {
542 
543  auto UpdateHost = static_cast<CGUpdateHost *>(CommandGroup.get());
544  Requirement *Req = UpdateHost->getReqToUpdate();
545 
546  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
547  return insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
548 }
549 
558 std::set<Command *>
559 Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record,
560  const Requirement *Req,
561  const ContextImplPtr &Context) {
562  std::set<Command *> RetDeps;
563  std::vector<Command *> Visited;
564  const bool ReadOnlyReq = Req->MAccessMode == access::mode::read;
565 
566  std::vector<Command *> ToAnalyze{Record->MWriteLeaves.toVector()};
567 
568  if (!ReadOnlyReq) {
569  std::vector<Command *> V{Record->MReadLeaves.toVector()};
570 
571  ToAnalyze.insert(ToAnalyze.begin(), V.begin(), V.end());
572  }
573 
574  while (!ToAnalyze.empty()) {
575  Command *DepCmd = ToAnalyze.back();
576  ToAnalyze.pop_back();
577 
578  std::vector<Command *> NewAnalyze;
579 
580  for (const DepDesc &Dep : DepCmd->MDeps) {
581  if (Dep.MDepRequirement->MSYCLMemObj != Req->MSYCLMemObj)
582  continue;
583 
584  bool CanBypassDep = false;
585  // If both only read
586  CanBypassDep |=
587  Dep.MDepRequirement->MAccessMode == access::mode::read && ReadOnlyReq;
588 
589  // If not overlap
590  CanBypassDep |= !doOverlap(Dep.MDepRequirement, Req);
591 
592  // Going through copying memory between contexts is not supported.
593  if (Dep.MDepCommand)
594  CanBypassDep &=
595  sameCtx(Context, Dep.MDepCommand->getQueue()->getContextImplPtr());
596 
597  if (!CanBypassDep) {
598  RetDeps.insert(DepCmd);
599  // No need to analyze deps of examining command as it's dependency
600  // itself.
601  NewAnalyze.clear();
602  break;
603  }
604 
605  if (markNodeAsVisited(Dep.MDepCommand, Visited))
606  NewAnalyze.push_back(Dep.MDepCommand);
607  }
608  ToAnalyze.insert(ToAnalyze.end(), NewAnalyze.begin(), NewAnalyze.end());
609  }
610  unmarkVisitedNodes(Visited);
611  return RetDeps;
612 }
613 
614 // A helper function for finding a command dependency on a specific memory
615 // object
616 DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd,
617  MemObjRecord *Record) {
618  for (const DepDesc &DD : Cmd->MDeps) {
619  if (getMemObjRecord(DD.MDepRequirement->MSYCLMemObj) == Record) {
620  return DD;
621  }
622  }
623  assert(false && "No dependency found for a leaf of the record");
624  return {nullptr, nullptr, nullptr};
625 }
626 
627 // The function searches for the alloca command matching context and
628 // requirement.
630 Scheduler::GraphBuilder::findAllocaForReq(MemObjRecord *Record,
631  const Requirement *Req,
632  const ContextImplPtr &Context) {
633  auto IsSuitableAlloca = [&Context, Req](AllocaCommandBase *AllocaCmd) {
634  bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context);
635  if (IsSuitableSubReq(Req)) {
636  const Requirement *TmpReq = AllocaCmd->getRequirement();
637  Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF;
638  Res &= TmpReq->MOffsetInBytes == Req->MOffsetInBytes;
639  Res &= TmpReq->MSYCLMemObj->getSize() == Req->MSYCLMemObj->getSize();
640  }
641  return Res;
642  };
643  const auto It = std::find_if(Record->MAllocaCommands.begin(),
644  Record->MAllocaCommands.end(), IsSuitableAlloca);
645  return (Record->MAllocaCommands.end() != It) ? *It : nullptr;
646 }
647 
648 static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
649  if (const char *HUMConfig = SYCLConfig<SYCL_HOST_UNIFIED_MEMORY>::get()) {
650  if (std::strcmp(HUMConfig, "0") == 0)
651  return Ctx->is_host();
652  if (std::strcmp(HUMConfig, "1") == 0)
653  return true;
654  }
655  for (const device &Device : Ctx->getDevices()) {
656  if (!Device.get_info<info::device::host_unified_memory>())
657  return false;
658  }
659  return true;
660 }
661 
662 // The function searches for the alloca command matching context and
663 // requirement. If none exists, new allocation command is created.
664 // Note, creation of new allocation command can lead to the current context
665 // (Record->MCurContext) change.
666 AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
667  MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue,
668  std::vector<Command *> &ToEnqueue) {
669 
670  AllocaCommandBase *AllocaCmd =
671  findAllocaForReq(Record, Req, Queue->getContextImplPtr());
672 
673  if (!AllocaCmd) {
674  std::vector<Command *> ToCleanUp;
675  if (IsSuitableSubReq(Req)) {
676  // Get parent requirement. It's hard to get right parents' range
677  // so full parent requirement has range represented in bytes
678  range<3> ParentRange{Req->MSYCLMemObj->getSize(), 1, 1};
679  Requirement ParentRequirement(/*Offset*/ {0, 0, 0}, ParentRange,
680  ParentRange, access::mode::read_write,
681  Req->MSYCLMemObj, /*Dims*/ 1,
682  /*Working with bytes*/ sizeof(char));
683 
684  auto *ParentAlloca =
685  getOrCreateAllocaForReq(Record, &ParentRequirement, Queue, ToEnqueue);
686  AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca, ToEnqueue,
687  ToCleanUp);
688  } else {
689 
690  const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange,
691  Req->MMemoryRange, access::mode::read_write,
692  Req->MSYCLMemObj, Req->MDims, Req->MElemSize,
693  0 /*ReMOffsetInBytes*/, false /*MIsSubBuffer*/);
694  // Can reuse user data for the first allocation. Do so if host unified
695  // memory is supported regardless of the access mode (the pointer will be
696  // reused). For devices without host unified memory the initialization
697  // will be performed as a write operation.
698  // TODO the case where the first alloca is made with a discard mode and
699  // the user pointer is read-only is still not handled: it leads to
700  // unnecessary copy on devices with unified host memory support.
701  const bool HostUnifiedMemory =
702  checkHostUnifiedMemory(Queue->getContextImplPtr());
703  // TODO casting is required here to get the necessary information
704  // without breaking ABI, replace with the next major version.
705  auto *MemObj = static_cast<SYCLMemObjT *>(Req->MSYCLMemObj);
706  const bool InitFromUserData = Record->MAllocaCommands.empty() &&
707  (HostUnifiedMemory || MemObj->isInterop());
708  AllocaCommandBase *LinkedAllocaCmd = nullptr;
709 
710  // For the first allocation on a device without host unified memory we
711  // might need to also create a host alloca right away in order to perform
712  // the initial memory write.
713  if (Record->MAllocaCommands.empty()) {
714  if (!HostUnifiedMemory &&
715  Req->MAccessMode != access::mode::discard_write &&
716  Req->MAccessMode != access::mode::discard_read_write) {
717  // There's no need to make a host allocation if the buffer is not
718  // initialized with user data.
719  if (MemObj->hasUserDataPtr()) {
720  QueueImplPtr DefaultHostQueue =
721  Scheduler::getInstance().getDefaultHostQueue();
722  AllocaCommand *HostAllocaCmd = new AllocaCommand(
723  DefaultHostQueue, FullReq, true /* InitFromUserData */,
724  nullptr /* LinkedAllocaCmd */);
725  Record->MAllocaCommands.push_back(HostAllocaCmd);
726  Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue);
727  ++(HostAllocaCmd->MLeafCounter);
728  Record->MCurContext = DefaultHostQueue->getContextImplPtr();
729  }
730  }
731  } else {
732  // If it is not the first allocation, try to setup a link
733  // FIXME: Temporary limitation, linked alloca commands for an image is
734  // not supported because map operation is not implemented for an image.
735  if (Req->MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer)
736  // Current limitation is to setup link between current allocation and
737  // new one. There could be situations when we could setup link with
738  // "not" current allocation, but it will require memory copy.
739  // Can setup link between cl and host allocations only
740  if (Queue->is_host() != Record->MCurContext->is_host()) {
741  // Linked commands assume that the host allocation is reused by the
742  // plugin runtime and that can lead to unnecessary copy overhead on
743  // devices that do not support host unified memory. Do not link the
744  // allocations in this case.
745  // However, if the user explicitly requests use of pinned host
746  // memory, map/unmap operations are expected to work faster than
747  // read/write from/to an artbitrary host pointer. Link such commands
748  // regardless of host unified memory support.
749  bool PinnedHostMemory = MemObj->has_property<
750  sycl::ext::oneapi::property::buffer::use_pinned_host_memory>();
751 
752  bool HostUnifiedMemoryOnNonHostDevice =
753  Queue->is_host() ? checkHostUnifiedMemory(Record->MCurContext)
754  : HostUnifiedMemory;
755  if (PinnedHostMemory || HostUnifiedMemoryOnNonHostDevice) {
756  AllocaCommandBase *LinkedAllocaCmdCand =
757  findAllocaForReq(Record, Req, Record->MCurContext);
758 
759  // Cannot setup link if candidate is linked already
760  if (LinkedAllocaCmdCand &&
761  !LinkedAllocaCmdCand->MLinkedAllocaCmd) {
762  LinkedAllocaCmd = LinkedAllocaCmdCand;
763  }
764  }
765  }
766  }
767 
768  AllocaCmd =
769  new AllocaCommand(Queue, FullReq, InitFromUserData, LinkedAllocaCmd);
770 
771  // Update linked command
772  if (LinkedAllocaCmd) {
773  Command *ConnCmd = AllocaCmd->addDep(
774  DepDesc{LinkedAllocaCmd, AllocaCmd->getRequirement(),
775  LinkedAllocaCmd},
776  ToCleanUp);
777  if (ConnCmd)
778  ToEnqueue.push_back(ConnCmd);
779  LinkedAllocaCmd->MLinkedAllocaCmd = AllocaCmd;
780 
781  // To ensure that the leader allocation is removed first
782  ConnCmd = AllocaCmd->getReleaseCmd()->addDep(
783  DepDesc(LinkedAllocaCmd->getReleaseCmd(),
784  AllocaCmd->getRequirement(), LinkedAllocaCmd),
785  ToCleanUp);
786  if (ConnCmd)
787  ToEnqueue.push_back(ConnCmd);
788 
789  // Device allocation takes ownership of the host ptr during
790  // construction, host allocation doesn't. So, device allocation should
791  // always be active here. Also if the "follower" command is a device one
792  // we have to change current context to the device one.
793  if (Queue->is_host()) {
794  AllocaCmd->MIsActive = false;
795  } else {
796  LinkedAllocaCmd->MIsActive = false;
797  Record->MCurContext = Queue->getContextImplPtr();
798 
799  std::set<Command *> Deps =
800  findDepsForReq(Record, Req, Queue->getContextImplPtr());
801  for (Command *Dep : Deps) {
802  Command *ConnCmd = AllocaCmd->addDep(
803  DepDesc{Dep, Req, LinkedAllocaCmd}, ToCleanUp);
804  if (ConnCmd)
805  ToEnqueue.push_back(ConnCmd);
806  }
807  updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
808  addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode, ToEnqueue);
809  }
810  }
811  }
812 
813  Record->MAllocaCommands.push_back(AllocaCmd);
814  Record->MWriteLeaves.push_back(AllocaCmd, ToEnqueue);
815  ++(AllocaCmd->MLeafCounter);
816  for (Command *Cmd : ToCleanUp)
817  cleanupCommand(Cmd);
818  }
819  return AllocaCmd;
820 }
821 
822 // The function sets MemModified flag in record if requirement has write access.
823 void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record,
824  Requirement *Req) {
825  switch (Req->MAccessMode) {
826  case access::mode::write:
827  case access::mode::read_write:
828  case access::mode::discard_write:
829  case access::mode::discard_read_write:
830  case access::mode::atomic:
831  Record->MMemModified = true;
832  break;
833  case access::mode::read:
834  break;
835  }
836 }
837 
838 template <typename T>
839 typename detail::enable_if_t<
840  std::is_same<typename std::remove_cv_t<T>, Requirement>::value,
841  EmptyCommand *>
842 Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector<T *> &Reqs,
843  const QueueImplPtr &Queue,
844  Command::BlockReason Reason,
845  std::vector<Command *> &ToEnqueue,
846  const bool AddDepsToLeaves) {
847  EmptyCommand *EmptyCmd =
848  new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue());
849 
850  if (!EmptyCmd)
851  throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY);
852 
853  EmptyCmd->MIsBlockable = true;
854  EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked;
855  EmptyCmd->MBlockReason = Reason;
856 
857  for (T *Req : Reqs) {
858  MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req, ToEnqueue);
859  AllocaCommandBase *AllocaCmd =
860  getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
861  EmptyCmd->addRequirement(Cmd, AllocaCmd, Req);
862  }
863  // addRequirement above call addDep that already will add EmptyCmd as user for
864  // Cmd no Reqs size check here so assume it is possible to have no Reqs passed
865  if (!Reqs.size())
866  Cmd->addUser(EmptyCmd);
867 
868  if (AddDepsToLeaves) {
869  const std::vector<DepDesc> &Deps = Cmd->MDeps;
870  std::vector<Command *> ToCleanUp;
871  for (const DepDesc &Dep : Deps) {
872  const Requirement *Req = Dep.MDepRequirement;
873  MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
874 
875  updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp);
876  addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue);
877  }
878  for (Command *Cmd : ToCleanUp)
879  cleanupCommand(Cmd);
880  }
881 
882  return EmptyCmd;
883 }
884 
885 static bool isInteropHostTask(const std::unique_ptr<ExecCGCommand> &Cmd) {
886  if (Cmd->getCG().getType() != CG::CGTYPE::CodeplayHostTask)
887  return false;
888 
889  const detail::CGHostTask &HT =
890  static_cast<detail::CGHostTask &>(Cmd->getCG());
891 
892  return HT.MHostTask->isInteropTask();
893 }
894 
895 static void combineAccessModesOfReqs(std::vector<Requirement *> &Reqs) {
896  std::unordered_map<SYCLMemObjI *, access::mode> CombinedModes;
897  bool HasDuplicateMemObjects = false;
898  for (const Requirement *Req : Reqs) {
899  auto Result = CombinedModes.insert(
900  std::make_pair(Req->MSYCLMemObj, Req->MAccessMode));
901  if (!Result.second) {
902  Result.first->second =
903  combineAccessModes(Result.first->second, Req->MAccessMode);
904  HasDuplicateMemObjects = true;
905  }
906  }
907 
908  if (!HasDuplicateMemObjects)
909  return;
910  for (Requirement *Req : Reqs) {
911  Req->MAccessMode = CombinedModes[Req->MSYCLMemObj];
912  }
913 }
914 
915 Command *
916 Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
917  QueueImplPtr Queue,
918  std::vector<Command *> &ToEnqueue) {
919  std::vector<Requirement *> &Reqs = CommandGroup->MRequirements;
920  const std::vector<detail::EventImplPtr> &Events = CommandGroup->MEvents;
921  const CG::CGTYPE CGType = CommandGroup->getType();
922 
923  auto NewCmd = std::make_unique<ExecCGCommand>(std::move(CommandGroup), Queue);
924  if (!NewCmd)
925  throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY);
926 
927  if (MPrintOptionsArray[BeforeAddCG])
928  printGraphAsDot("before_addCG");
929 
930  // If there are multiple requirements for the same memory object, its
931  // AllocaCommand creation will be dependent on the access mode of the first
932  // requirement. Combine these access modes to take all of them into account.
934  std::vector<Command *> ToCleanUp;
935  for (Requirement *Req : Reqs) {
936  MemObjRecord *Record = nullptr;
937  AllocaCommandBase *AllocaCmd = nullptr;
938 
939  bool isSameCtx = false;
940 
941  {
942  const QueueImplPtr &QueueForAlloca =
943  isInteropHostTask(NewCmd)
944  ? static_cast<detail::CGHostTask &>(NewCmd->getCG()).MQueue
945  : Queue;
946 
947  Record = getOrInsertMemObjRecord(QueueForAlloca, Req, ToEnqueue);
948  markModifiedIfWrite(Record, Req);
949 
950  AllocaCmd =
951  getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue);
952 
953  isSameCtx =
954  sameCtx(QueueForAlloca->getContextImplPtr(), Record->MCurContext);
955  }
956 
957  // If there is alloca command we need to check if the latest memory is in
958  // required context.
959  if (isSameCtx) {
960  // If the memory is already in the required host context, check if the
961  // required access mode is valid, remap if not.
962  if (Record->MCurContext->is_host() &&
963  !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
964  remapMemoryObject(Record, Req, AllocaCmd, ToEnqueue);
965  } else {
966  // Cannot directly copy memory from OpenCL device to OpenCL device -
967  // create two copies: device->host and host->device.
968  bool NeedMemMoveToHost = false;
969  auto MemMoveTargetQueue = Queue;
970 
971  if (isInteropHostTask(NewCmd)) {
972  const detail::CGHostTask &HT =
973  static_cast<detail::CGHostTask &>(NewCmd->getCG());
974 
975  if (HT.MQueue->getContextImplPtr() != Record->MCurContext) {
976  NeedMemMoveToHost = true;
977  MemMoveTargetQueue = HT.MQueue;
978  }
979  } else if (!Queue->is_host() && !Record->MCurContext->is_host())
980  NeedMemMoveToHost = true;
981 
982  if (NeedMemMoveToHost)
983  insertMemoryMove(Record, Req,
984  Scheduler::getInstance().getDefaultHostQueue(),
985  ToEnqueue);
986  insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
987  }
988  std::set<Command *> Deps =
989  findDepsForReq(Record, Req, Queue->getContextImplPtr());
990 
991  for (Command *Dep : Deps) {
992  Command *ConnCmd =
993  NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}, ToCleanUp);
994  if (ConnCmd)
995  ToEnqueue.push_back(ConnCmd);
996  }
997  }
998 
999  // Set new command as user for dependencies and update leaves.
1000  // Node dependencies can be modified further when adding the node to leaves,
1001  // iterate over their copy.
1002  // FIXME employ a reference here to eliminate copying of a vector
1003  std::vector<DepDesc> Deps = NewCmd->MDeps;
1004  for (DepDesc &Dep : Deps) {
1005  const Requirement *Req = Dep.MDepRequirement;
1006  MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
1007  updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode, ToCleanUp);
1008  addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode, ToEnqueue);
1009  }
1010 
1011  // Register all the events as dependencies
1012  for (detail::EventImplPtr e : Events) {
1013  if (Command *ConnCmd = NewCmd->addDep(e, ToCleanUp))
1014  ToEnqueue.push_back(ConnCmd);
1015  }
1016 
1017  if (CGType == CG::CGTYPE::CodeplayHostTask)
1018  NewCmd->MEmptyCmd =
1019  addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements, Queue,
1020  Command::BlockReason::HostTask, ToEnqueue);
1021 
1022  if (MPrintOptionsArray[AfterAddCG])
1023  printGraphAsDot("after_addCG");
1024 
1025  for (Command *Cmd : ToCleanUp)
1026  cleanupCommand(Cmd);
1027  return NewCmd.release();
1028 }
1029 
1030 void Scheduler::GraphBuilder::decrementLeafCountersForRecord(
1031  MemObjRecord *Record) {
1032  for (Command *Cmd : Record->MReadLeaves) {
1033  --(Cmd->MLeafCounter);
1034  if (Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued() &&
1036  cleanupCommand(Cmd);
1037  }
1038  for (Command *Cmd : Record->MWriteLeaves) {
1039  --(Cmd->MLeafCounter);
1040  if (Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued() &&
1042  cleanupCommand(Cmd);
1043  }
1044 }
1045 
1046 void Scheduler::GraphBuilder::cleanupCommandsForRecord(
1047  MemObjRecord *Record,
1048  std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate,
1049  std::vector<std::shared_ptr<const void>> &AuxResourcesToDeallocate) {
1050  std::vector<AllocaCommandBase *> &AllocaCommands = Record->MAllocaCommands;
1051  if (AllocaCommands.empty())
1052  return;
1053 
1054  assert(MCmdsToVisit.empty());
1055  MVisitedCmds.clear();
1056 
1057  // First, mark all allocas for deletion and their direct users for traversal
1058  // Dependencies of the users will be cleaned up during the traversal
1059  for (Command *AllocaCmd : AllocaCommands) {
1060  markNodeAsVisited(AllocaCmd, MVisitedCmds);
1061 
1062  for (Command *UserCmd : AllocaCmd->MUsers)
1063  // Linked alloca cmd may be in users of this alloca. We're not going to
1064  // visit it.
1065  if (UserCmd->getType() != Command::CommandType::ALLOCA)
1066  MCmdsToVisit.push(UserCmd);
1067  else
1068  markNodeAsVisited(UserCmd, MVisitedCmds);
1069 
1070  AllocaCmd->MMarks.MToBeDeleted = true;
1071  // These commands will be deleted later, clear users now to avoid
1072  // updating them during edge removal
1073  AllocaCmd->MUsers.clear();
1074  }
1075 
1076  // Make sure the Linked Allocas are marked visited by the previous walk.
1077  // Remove allocation commands from the users of their dependencies.
1078  for (AllocaCommandBase *AllocaCmd : AllocaCommands) {
1079  AllocaCommandBase *LinkedCmd = AllocaCmd->MLinkedAllocaCmd;
1080 
1081  if (LinkedCmd) {
1082  assert(LinkedCmd->MMarks.MVisited);
1083  }
1084 
1085  for (DepDesc &Dep : AllocaCmd->MDeps)
1086  if (Dep.MDepCommand)
1087  Dep.MDepCommand->MUsers.erase(AllocaCmd);
1088  }
1089 
1090  // Traverse the graph using BFS
1091  while (!MCmdsToVisit.empty()) {
1092  Command *Cmd = MCmdsToVisit.front();
1093  MCmdsToVisit.pop();
1094 
1095  if (!markNodeAsVisited(Cmd, MVisitedCmds))
1096  continue;
1097 
1098  // Collect stream objects for a visited command.
1099  if (Cmd->getType() == Command::CommandType::RUN_CG) {
1100  auto ExecCmd = static_cast<ExecCGCommand *>(Cmd);
1101 
1102  // Transfer ownership of stream implementations.
1103  std::vector<std::shared_ptr<stream_impl>> Streams = ExecCmd->getStreams();
1104  ExecCmd->clearStreams();
1105  StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(),
1106  Streams.end());
1107 
1108  // Transfer ownership of auxiliary resources.
1109  std::vector<std::shared_ptr<const void>> AuxResources =
1110  ExecCmd->getAuxiliaryResources();
1111  ExecCmd->clearAuxiliaryResources();
1112  AuxResourcesToDeallocate.insert(AuxResourcesToDeallocate.end(),
1113  AuxResources.begin(), AuxResources.end());
1114  }
1115 
1116  for (Command *UserCmd : Cmd->MUsers)
1117  if (UserCmd->getType() != Command::CommandType::ALLOCA)
1118  MCmdsToVisit.push(UserCmd);
1119 
1120  // Delete all dependencies on any allocations being removed
1121  // Track which commands should have their users updated
1122  std::map<Command *, bool> ShouldBeUpdated;
1123  auto NewEnd = std::remove_if(
1124  Cmd->MDeps.begin(), Cmd->MDeps.end(), [&](const DepDesc &Dep) {
1125  if (std::find(AllocaCommands.begin(), AllocaCommands.end(),
1126  Dep.MAllocaCmd) != AllocaCommands.end()) {
1127  ShouldBeUpdated.insert({Dep.MDepCommand, true});
1128  return true;
1129  }
1130  ShouldBeUpdated[Dep.MDepCommand] = false;
1131  return false;
1132  });
1133  Cmd->MDeps.erase(NewEnd, Cmd->MDeps.end());
1134 
1135  // Update users of removed dependencies
1136  for (auto DepCmdIt : ShouldBeUpdated) {
1137  if (!DepCmdIt.second)
1138  continue;
1139  DepCmdIt.first->MUsers.erase(Cmd);
1140  }
1141 
1142  // If all dependencies have been removed this way, mark the command for
1143  // deletion
1144  if (Cmd->MDeps.empty()) {
1145  Cmd->MUsers.clear();
1146  // Do not delete the node if it's scheduled for post-enqueue cleanup to
1147  // avoid double free.
1148  if (!Cmd->MPostEnqueueCleanup)
1149  Cmd->MMarks.MToBeDeleted = true;
1150  }
1151  }
1152 
1153  handleVisitedNodes(MVisitedCmds);
1154 }
1155 
1156 void Scheduler::GraphBuilder::cleanupCommand(Command *Cmd) {
1158  return;
1159  assert(Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued());
1160  Command::CommandType CmdT = Cmd->getType();
1161 
1162  assert(CmdT != Command::ALLOCA && CmdT != Command::ALLOCA_SUB_BUF);
1163  assert(CmdT != Command::RELEASE);
1164  assert(CmdT != Command::RUN_CG ||
1165  (static_cast<ExecCGCommand *>(Cmd))->getCG().getType() !=
1166  CG::CGTYPE::CodeplayHostTask);
1167 #ifndef NDEBUG
1168  if (CmdT == Command::RUN_CG) {
1169  auto *ExecCGCmd = static_cast<ExecCGCommand *>(Cmd);
1170  if (ExecCGCmd->getCG().getType() == CG::CGTYPE::Kernel) {
1171  auto *ExecKernelCG = static_cast<CGExecKernel *>(&ExecCGCmd->getCG());
1172  assert(!ExecKernelCG->hasStreams());
1173  assert(!ExecKernelCG->hasAuxiliaryResources());
1174  }
1175  }
1176 #endif
1177  (void)CmdT;
1178 
1179  for (Command *UserCmd : Cmd->MUsers) {
1180  for (DepDesc &Dep : UserCmd->MDeps) {
1181  // Link the users of the command to the alloca command(s) instead
1182  if (Dep.MDepCommand == Cmd) {
1183  // ... unless the user is the alloca itself.
1184  if (Dep.MAllocaCmd == UserCmd) {
1185  Dep.MDepCommand = nullptr;
1186  } else {
1187  Dep.MDepCommand = Dep.MAllocaCmd;
1188  Dep.MDepCommand->MUsers.insert(UserCmd);
1189  }
1190  }
1191  }
1192  }
1193  // Update dependency users
1194  for (DepDesc &Dep : Cmd->MDeps) {
1195  Command *DepCmd = Dep.MDepCommand;
1196  DepCmd->MUsers.erase(Cmd);
1197  }
1198 
1199  Cmd->getEvent()->setCommand(nullptr);
1200  delete Cmd;
1201 }
1202 
1203 void Scheduler::GraphBuilder::cleanupFinishedCommands(
1204  Command *FinishedCmd,
1205  std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate,
1206  std::vector<std::shared_ptr<const void>> &AuxResourcesToDeallocate) {
1207  assert(MCmdsToVisit.empty());
1208  MCmdsToVisit.push(FinishedCmd);
1209  MVisitedCmds.clear();
1210 
1211  // Traverse the graph using BFS
1212  while (!MCmdsToVisit.empty()) {
1213  Command *Cmd = MCmdsToVisit.front();
1214  MCmdsToVisit.pop();
1215 
1216  if (!markNodeAsVisited(Cmd, MVisitedCmds))
1217  continue;
1218 
1219  // Collect stream objects for a visited command.
1220  if (Cmd->getType() == Command::CommandType::RUN_CG) {
1221  auto ExecCmd = static_cast<ExecCGCommand *>(Cmd);
1222 
1223  // Transfer ownership of stream implementations.
1224  std::vector<std::shared_ptr<stream_impl>> Streams = ExecCmd->getStreams();
1225  ExecCmd->clearStreams();
1226  StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(),
1227  Streams.end());
1228 
1229  // Transfer ownership of auxiliary resources.
1230  std::vector<std::shared_ptr<const void>> AuxResources =
1231  ExecCmd->getAuxiliaryResources();
1232  ExecCmd->clearAuxiliaryResources();
1233  AuxResourcesToDeallocate.insert(AuxResourcesToDeallocate.end(),
1234  AuxResources.begin(), AuxResources.end());
1235  }
1236 
1237  for (const DepDesc &Dep : Cmd->MDeps) {
1238  if (Dep.MDepCommand)
1239  MCmdsToVisit.push(Dep.MDepCommand);
1240  }
1241 
1242  // Do not clean up the node if it is a leaf for any memory object
1243  if (Cmd->MLeafCounter > 0)
1244  continue;
1245  // Do not clean up allocation commands
1246  Command::CommandType CmdT = Cmd->getType();
1247  if (CmdT == Command::ALLOCA || CmdT == Command::ALLOCA_SUB_BUF)
1248  continue;
1249 
1250  for (Command *UserCmd : Cmd->MUsers) {
1251  for (DepDesc &Dep : UserCmd->MDeps) {
1252  // Link the users of the command to the alloca command(s) instead
1253  if (Dep.MDepCommand == Cmd) {
1254  Dep.MDepCommand = Dep.MAllocaCmd;
1255  Dep.MDepCommand->MUsers.insert(UserCmd);
1256  }
1257  }
1258  }
1259  // Update dependency users
1260  for (DepDesc &Dep : Cmd->MDeps) {
1261  Command *DepCmd = Dep.MDepCommand;
1262  DepCmd->MUsers.erase(Cmd);
1263  }
1264 
1265  // Isolate the node instead of deleting it if it's scheduled for
1266  // post-enqueue cleanup to avoid double free.
1267  if (Cmd->MPostEnqueueCleanup) {
1268  Cmd->MDeps.clear();
1269  Cmd->MUsers.clear();
1270  } else {
1271  Cmd->MMarks.MToBeDeleted = true;
1272  }
1273  }
1274  handleVisitedNodes(MVisitedCmds);
1275 }
1276 
1277 void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) {
1278  const auto It = std::find_if(
1279  MMemObjs.begin(), MMemObjs.end(),
1280  [MemObject](const SYCLMemObjI *Obj) { return Obj == MemObject; });
1281  if (It != MMemObjs.end())
1282  MMemObjs.erase(It);
1283  MemObject->MRecord.reset();
1284 }
1285 
1286 // Make Cmd depend on DepEvent from different context. Connection is performed
1287 // via distinct ConnectCmd with host task command group on host queue. Cmd will
1288 // depend on ConnectCmd's host event.
1289 // DepEvent may not have a command associated with it in at least two cases:
1290 // - the command was deleted upon cleanup process;
1291 // - DepEvent is user event.
1292 // In both of these cases the only thing we can do is to make ConnectCmd depend
1293 // on DepEvent.
1294 // Otherwise, when there is a command associated with DepEvent, we make
1295 // ConnectCmd depend on on this command. If there is valid, i.e. non-nil,
1296 // requirement in Dep we make ConnectCmd depend on DepEvent's command with this
1297 // requirement.
1298 // Optionality of Dep is set by Dep.MDepCommand equal to nullptr.
1299 Command *Scheduler::GraphBuilder::connectDepEvent(
1300  Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep,
1301  std::vector<Command *> &ToCleanUp) {
1302  assert(Cmd->getWorkerContext() != DepEvent->getContextImpl());
1303 
1304  // construct Host Task type command manually and make it depend on DepEvent
1305  ExecCGCommand *ConnectCmd = nullptr;
1306 
1307  try {
1308  std::unique_ptr<detail::HostTask> HT(new detail::HostTask);
1309  std::unique_ptr<detail::CG> ConnectCG(new detail::CGHostTask(
1310  std::move(HT), /* Queue = */ {}, /* Context = */ {}, /* Args = */ {},
1311  /* ArgsStorage = */ {}, /* AccStorage = */ {},
1312  /* SharedPtrStorage = */ {}, /* Requirements = */ {},
1313  /* DepEvents = */ {DepEvent}, CG::CodeplayHostTask,
1314  /* Payload */ {}));
1315  ConnectCmd = new ExecCGCommand(
1316  std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue());
1317  } catch (const std::bad_alloc &) {
1318  throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY);
1319  }
1320 
1321  EmptyCommand *EmptyCmd = nullptr;
1322 
1323  if (Dep.MDepRequirement) {
1324  // make ConnectCmd depend on requirement
1325  // Dismiss the result here as it's not a connection now,
1326  // 'cause ConnectCmd is host one
1327  (void)ConnectCmd->addDep(Dep, ToCleanUp);
1328  assert(reinterpret_cast<Command *>(DepEvent->getCommand()) ==
1329  Dep.MDepCommand);
1330  // add user to Dep.MDepCommand is already performed beyond this if branch
1331 
1332  // ConnectCmd is added as dependency to Cmd
1333  // We build the following structure Cmd->EmptyCmd/ConnectCmd->DepCmd
1334  // No need to add ConnectCmd to leaves buffer since it is a dependency
1335  // for command Cmd that will be added there
1336 
1337  std::vector<Command *> ToEnqueue;
1338  const std::vector<const Requirement *> Reqs(1, Dep.MDepRequirement);
1339  EmptyCmd = addEmptyCmd(ConnectCmd, Reqs,
1340  Scheduler::getInstance().getDefaultHostQueue(),
1341  Command::BlockReason::HostTask, ToEnqueue, false);
1342  assert(ToEnqueue.size() == 0);
1343 
1344  // Depend Cmd on empty command
1345  {
1346  DepDesc CmdDep = Dep;
1347  CmdDep.MDepCommand = EmptyCmd;
1348 
1349  // Dismiss the result here as it's not a connection now,
1350  // 'cause EmptyCmd is host one
1351  (void)Cmd->addDep(CmdDep, ToCleanUp);
1352  }
1353  } else {
1354  // It is required condition in another a path and addUser will be set in
1355  // addDep
1356  if (Command *DepCmd = reinterpret_cast<Command *>(DepEvent->getCommand()))
1357  DepCmd->addUser(ConnectCmd);
1358 
1359  std::vector<Command *> ToEnqueue;
1360  EmptyCmd = addEmptyCmd<Requirement>(
1361  ConnectCmd, {}, Scheduler::getInstance().getDefaultHostQueue(),
1362  Command::BlockReason::HostTask, ToEnqueue);
1363  assert(ToEnqueue.size() == 0);
1364 
1365  // There is no requirement thus, empty command will only depend on
1366  // ConnectCmd via its event.
1367  // Dismiss the result here as it's not a connection now,
1368  // 'cause ConnectCmd is host one.
1369  (void)EmptyCmd->addDep(ConnectCmd->getEvent(), ToCleanUp);
1370  (void)ConnectCmd->addDep(DepEvent, ToCleanUp);
1371 
1372  // Depend Cmd on empty command
1373  // Dismiss the result here as it's not a connection now,
1374  // 'cause EmptyCmd is host one
1375  (void)Cmd->addDep(EmptyCmd->getEvent(), ToCleanUp);
1376  // Added by addDep in another path
1377  EmptyCmd->addUser(Cmd);
1378  }
1379 
1380  ConnectCmd->MEmptyCmd = EmptyCmd;
1381 
1382  return ConnectCmd;
1383 }
1384 
1385 } // namespace detail
1386 } // namespace sycl
1387 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::AllocaCommandBase::getReleaseCmd
ReleaseCommand * getReleaseCmd()
Definition: commands.hpp:369
cl::sycl::detail::AccessorImplHost::MDims
unsigned int MDims
Definition: accessor_impl.hpp:114
cl::sycl::detail::UnMapMemObject
The unmap command removes mapping of host memory onto device memory.
Definition: commands.hpp:462
event_impl.hpp
cl::sycl::detail::handleVisitedNodes
static void handleVisitedNodes(std::vector< Command * > &Visited)
Definition: graph_builder.cpp:128
cl::sycl::detail::ExecCGCommand
The exec CG command enqueues execution of kernel or explicit memory operation.
Definition: commands.hpp:539
cl::sycl::detail::Command::MUsers
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:258
cl::sycl::detail::MemObjRecord::MCurContext
ContextImplPtr MCurContext
Definition: scheduler.hpp:209
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:32
cl::sycl::detail::AllocaCommandBase::MIsActive
bool MIsActive
Indicates that current alloca is active one.
Definition: commands.hpp:392
cl::sycl::detail::markNodeAsVisited
static bool markNodeAsVisited(Command *Cmd, std::vector< Command * > &Visited)
Definition: graph_builder.cpp:114
context_impl.hpp
T
cl::sycl::detail::Command::MPostEnqueueCleanup
bool MPostEnqueueCleanup
Indicates that the node will be freed by cleanup after enqueue.
Definition: commands.hpp:318
cl::sycl::detail::CGExecKernel
"Execute kernel" command group class.
Definition: cg.hpp:241
cl::sycl::detail::Command::getQueue
const QueueImplPtr & getQueue() const
Definition: commands.hpp:149
cl::sycl::detail::HostTask
Definition: cg_types.hpp:229
config.hpp
cl::sycl::detail::ExecCGCommand::MEmptyCmd
EmptyCommand * MEmptyCmd
Definition: commands.hpp:558
cl::sycl::detail::MemObjRecord
Memory Object Record.
Definition: scheduler.hpp:193
cl::sycl::detail::unmarkVisitedNodes
static void unmarkVisitedNodes(std::vector< Command * > &Visited)
Definition: graph_builder.cpp:123
cl::sycl::detail::MemCpyCommandHost::getRequirement
const Requirement * getRequirement() const final
Definition: commands.hpp:514
cl::sycl::detail::LeavesCollection::push_back
bool push_back(value_type Cmd, EnqueueListT &ToEnqueue)
Returns true if insertion took place. Returns false otherwise.
Definition: leaves_collection.cpp:52
cl::sycl::detail::Command::MDeps
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:256
cl::sycl::detail::SYCLConfig
Definition: config.hpp:105
cl::sycl::detail::MapMemObject
The map command enqueues mapping of device memory onto host memory.
Definition: commands.hpp:443
cl::sycl::detail::AccessorImplHost::MData
void * MData
Definition: accessor_impl.hpp:119
cl::sycl::detail::Command::CommandType
CommandType
Definition: commands.hpp:97
cl::sycl::detail::isAccessModeAllowed
static bool isAccessModeAllowed(access::mode Required, access::mode Current)
Checks if the required access mode is allowed under the current one.
Definition: graph_builder.cpp:56
cl::sycl::detail::Command::isSuccessfullyEnqueued
bool isSuccessfullyEnqueued() const
Definition: commands.hpp:141
cl::sycl::detail::MemObjRecord::MWriteLeaves
LeavesCollection MWriteLeaves
Definition: scheduler.hpp:206
cl::sycl::detail::isInteropHostTask
static bool isInteropHostTask(const std::unique_ptr< ExecCGCommand > &Cmd)
Definition: graph_builder.cpp:885
cl::sycl::detail::combineAccessModesOfReqs
static void combineAccessModesOfReqs(std::vector< Requirement * > &Reqs)
Definition: graph_builder.cpp:895
sycl
Definition: invoke_simd.hpp:68
cl::sycl::detail::LeavesCollection::toVector
std::vector< value_type > toVector() const
Definition: leaves_collection.cpp:64
cl::sycl::detail::CGHostTask::MHostTask
std::unique_ptr< HostTask > MHostTask
Definition: cg.hpp:484
access.hpp
queue_impl.hpp
cl::sycl::detail::MemObjRecord::MHostAccess
access::mode MHostAccess
Definition: scheduler.hpp:213
scheduler.hpp
cl::sycl::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:110
cl::sycl::range< 3 >
cl::sycl::detail::insertMapUnmapForLinkedCmds
static Command * insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, AllocaCommandBase *AllocaCmdDst, access::mode MapMode)
Definition: graph_builder.cpp:295
cl::sycl::detail::Requirement
AccessorImplHost Requirement
Definition: accessor_impl.hpp:218
cl::sycl::detail::LeavesCollection::remove
size_t remove(value_type Cmd)
Replacement for std::remove with subsequent call to erase(newEnd, end()).
Definition: leaves_collection.cpp:38
cl::sycl::detail::SYCLMemObjI::getType
virtual MemObjType getType() const =0
cl::sycl::detail::Command::addUser
void addUser(Command *NewUser)
Definition: commands.hpp:124
cl::sycl::detail::MemObjRecord::MReadLeaves
LeavesCollection MReadLeaves
Definition: scheduler.hpp:203
cl::sycl::detail::Command::MLeafCounter
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:262
cl::sycl::range::size
size_t size() const
Definition: range.hpp:50
cl::sycl::detail::AccessorImplHost::MElemSize
unsigned int MElemSize
Definition: accessor_impl.hpp:115
cl::sycl::detail::MemObjRecord::MMemModified
bool MMemModified
Definition: scheduler.hpp:217
cl::sycl::detail::CG::CGTYPE
CGTYPE
Type of the command group.
Definition: cg.hpp:156
cl::sycl::detail::LeavesCollection::EnqueueListT
std::vector< Command * > EnqueueListT
Definition: leaves_collection.hpp:42
cl::sycl::detail::LeavesCollection::AllocateDependencyF
std::function< void(Command *, Command *, MemObjRecord *, EnqueueListT &)> AllocateDependencyF
Definition: leaves_collection.hpp:46
char
cl::sycl::detail::AccessorImplHost
Definition: accessor_impl.hpp:74
cl::sycl::detail::ExecCGCommand::getStreams
std::vector< StreamImplPtr > getStreams() const
Definition: commands.cpp:1410
cl::sycl::detail::CGHostTask::MQueue
std::shared_ptr< detail::queue_impl > MQueue
Definition: cg.hpp:486
cl::sycl::detail::sameCtx
static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS)
Definition: graph_builder.cpp:44
cl::sycl::detail::CGUpdateHost
"Update host" command group class.
Definition: cg.hpp:348
cl::sycl::detail::DepDesc::MDepRequirement
const Requirement * MDepRequirement
Requirement for the dependency.
Definition: commands.hpp:82
cl::sycl::detail::AccessorImplHost::MOffsetInBytes
unsigned int MOffsetInBytes
Definition: accessor_impl.hpp:116
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:35
cl::sycl::detail::DepDesc
Dependency between two commands.
Definition: commands.hpp:69
cl::sycl::detail::AllocaSubBufCommand
The AllocaSubBuf command enqueues creation of sub-buffer of memory object.
Definition: commands.hpp:424
cl::sycl::detail::Command::getType
CommandType getType() const
Definition: commands.hpp:127
cl::sycl::detail::SYCLMemObjI::MRecord
std::shared_ptr< MemObjRecord > MRecord
Definition: sycl_mem_obj_i.hpp:74
cl::sycl::detail::AccessorImplHost::MIsSubBuffer
bool MIsSubBuffer
Definition: accessor_impl.hpp:117
cl::sycl::detail::doOverlap
static bool doOverlap(const Requirement *LHS, const Requirement *RHS)
Checks whether two requirements overlap or not.
Definition: graph_builder.cpp:37
cl::sycl::detail::EventImplPtr
std::shared_ptr< detail::event_impl > EventImplPtr
Definition: memory_manager.hpp:31
cl::sycl::detail::CGHostTask
Definition: cg.hpp:482
cl::sycl::detail::Command::MMarks
Marks MMarks
Used for marking the node during graph traversal.
Definition: commands.hpp:271
cl::sycl::detail::AllocaCommandBase
Base class for memory allocation commands.
Definition: commands.hpp:364
cl::sycl::detail::AllocaCommandBase::MMemAllocation
void * MMemAllocation
Definition: commands.hpp:383
cl::sycl::detail::Command
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:95
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::SYCLMemObjI::getSize
virtual size_t getSize() const =0
cl::sycl::detail::AccessorImplHost::MSYCLMemObj
detail::SYCLMemObjI * MSYCLMemObj
Definition: accessor_impl.hpp:112
cl::sycl::detail::Command::Marks::MToBeDeleted
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Definition: commands.hpp:268
cl::sycl::detail::UpdateHostRequirementCommand
Definition: commands.hpp:574
cl::sycl::detail::queue_impl
Definition: queue_impl.hpp:54
cl::sycl::detail::QueueImplPtr
std::shared_ptr< detail::queue_impl > QueueImplPtr
Definition: memory_manager.hpp:30
cl::sycl::detail::AllocaCommandBase::getRequirement
const Requirement * getRequirement() const final
Definition: commands.hpp:375
cl::sycl::detail::AccessorImplHost::MAccessMode
access::mode MAccessMode
Definition: accessor_impl.hpp:110
PI_INVALID_VALUE
@ PI_INVALID_VALUE
Definition: pi.h:91
cl::sycl::detail::Command::supportsPostEnqueueCleanup
virtual bool supportsPostEnqueueCleanup() const
Returns true iff this command can be freed by post enqueue cleanup.
Definition: commands.cpp:627
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:204
exception.hpp
cl::sycl::detail::checkHostUnifiedMemory
static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx)
Definition: graph_builder.cpp:648
cl::sycl::detail::AccessorImplHost::MMemoryRange
range< 3 > MMemoryRange
Definition: accessor_impl.hpp:109
cl::sycl::detail::DepDesc::MDepCommand
Command * MDepCommand
The actual dependency command.
Definition: commands.hpp:80
PI_OUT_OF_HOST_MEMORY
@ PI_OUT_OF_HOST_MEMORY
Definition: pi.h:97
cl::sycl::detail::Command::addDep
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
Definition: commands.cpp:632
cl::sycl::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:55
cl::sycl::detail::Command::getEvent
const EventImplPtr & getEvent() const
Definition: commands.hpp:153
cl::sycl::detail::MemObjRecord::MAllocaCommands
std::vector< AllocaCommandBase * > MAllocaCommands
Definition: scheduler.hpp:200
cl::sycl::detail::DepDesc::MAllocaCmd
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
Definition: commands.hpp:85
cl::sycl::detail::printDotRecursive
static void printDotRecursive(std::fstream &Stream, std::vector< Command * > &Visited, Command *Cmd)
Definition: graph_builder.cpp:138
cl::sycl::access::mode
mode
Definition: access.hpp:28
cl::sycl::detail::AccessorImplHost::MAccessRange
range< 3 > MAccessRange
Definition: accessor_impl.hpp:107
cl::sycl::detail::AccessorImplHost::MBlockedCmd
Command * MBlockedCmd
Definition: accessor_impl.hpp:121
cl::sycl::detail::LeavesCollection
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
Definition: leaves_collection.hpp:38
cl::sycl::detail::Command::getWorkerContext
virtual const ContextImplPtr & getWorkerContext() const
Get the context of the queue this command will be submitted to.
Definition: commands.cpp:619
cl::sycl::detail::IsSuitableSubReq
static bool IsSuitableSubReq(const Requirement *Req)
Checks if current requirement is requirement for sub buffer.
Definition: graph_builder.cpp:51
cl::sycl::detail::Command::printDot
virtual void printDot(std::ostream &Stream) const =0
cl::sycl::detail::AllocaCommandBase::MLinkedAllocaCmd
AllocaCommandBase * MLinkedAllocaCmd
Alloca command linked with current command.
Definition: commands.hpp:390
cl::sycl::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
cl::sycl::detail::combineAccessModes
static access::mode combineAccessModes(access::mode A, access::mode B)
Combines two access modes into a single one that allows both.
Definition: graph_builder.cpp:74
cl::sycl::detail::EmptyCommand
The empty command does nothing during enqueue.
Definition: commands.hpp:323
cl::sycl::detail::Command::Marks::MVisited
bool MVisited
Used for marking the node as visited during graph traversal.
Definition: commands.hpp:266
cl::sycl::detail::SYCLMemObjI
Definition: sycl_mem_obj_i.hpp:28
cl::sycl::detail::SYCLMemObjI::getInteropContext
virtual ContextImplPtr getInteropContext() const =0
cl::sycl::detail::UpdateHostRequirementCommand::getRequirement
const Requirement * getRequirement() const final
Definition: commands.hpp:580
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12
cl::sycl::detail::MemCpyCommandHost
The mem copy host command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:507
memory_manager.hpp