DPC++ Runtime
Runtime libraries for oneAPI DPC++
graph_builder.cpp
Go to the documentation of this file.
1 //===-- graph_builder.cpp - SYCL Graph Builder ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "detail/config.hpp"
10 #include <detail/context_impl.hpp>
11 #include <detail/event_impl.hpp>
12 #include <sycl/feature_test.hpp>
13 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
14 #include <detail/jit_compiler.hpp>
15 #endif
17 #include <detail/queue_impl.hpp>
20 #include <sycl/access/access.hpp>
21 #include <sycl/exception.hpp>
22 
23 #include <algorithm>
24 #include <cstdlib>
25 #include <cstring>
26 #include <fstream>
27 #include <map>
28 #include <memory>
29 #include <queue>
30 #include <set>
31 #include <vector>
32 
33 namespace sycl {
35 namespace detail {
36 
41 // TODO merge with LeavesCollection's version of doOverlap (see
42 // leaves_collection.cpp).
43 static bool doOverlap(const Requirement *LHS, const Requirement *RHS) {
44  return (LHS->MOffsetInBytes + LHS->MAccessRange.size() * LHS->MElemSize >=
45  RHS->MOffsetInBytes) ||
46  (RHS->MOffsetInBytes + RHS->MAccessRange.size() * RHS->MElemSize >=
47  LHS->MOffsetInBytes);
48 }
49 
50 static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS) {
51  // Consider two different host contexts to be the same to avoid additional
52  // allocation on the host
53  return LHS == RHS || (LHS->is_host() && RHS->is_host());
54 }
55 
57 static bool IsSuitableSubReq(const Requirement *Req) {
58  return Req->MIsSubBuffer;
59 }
60 
62 static bool isAccessModeAllowed(access::mode Required, access::mode Current) {
63  switch (Current) {
64  case access::mode::read:
65  return (Required == Current);
67  assert(false && "Write only access is expected to be mapped as read_write");
68  return (Required == Current || Required == access::mode::discard_write);
70  case access::mode::atomic:
71  case access::mode::discard_write:
72  case access::mode::discard_read_write:
73  return true;
74  }
75  assert(false);
76  return false;
77 }
78 
81  if (A == B)
82  return A;
83 
84  if (A == access::mode::discard_write &&
85  (B == access::mode::discard_read_write || B == access::mode::write))
86  return B;
87 
88  if (B == access::mode::discard_write &&
89  (A == access::mode::discard_read_write || A == access::mode::write))
90  return A;
91 
93 }
94 
95 Scheduler::GraphBuilder::GraphBuilder() {
96  if (const char *EnvVarCStr = SYCLConfig<SYCL_PRINT_EXECUTION_GRAPH>::get()) {
97  std::string GraphPrintOpts(EnvVarCStr);
98  bool EnableAlways = GraphPrintOpts.find("always") != std::string::npos;
99 
100  if (GraphPrintOpts.find("before_addCG") != std::string::npos ||
101  EnableAlways)
102  MPrintOptionsArray[BeforeAddCG] = true;
103  if (GraphPrintOpts.find("after_addCG") != std::string::npos || EnableAlways)
104  MPrintOptionsArray[AfterAddCG] = true;
105  if (GraphPrintOpts.find("before_addCopyBack") != std::string::npos ||
106  EnableAlways)
107  MPrintOptionsArray[BeforeAddCopyBack] = true;
108  if (GraphPrintOpts.find("after_addCopyBack") != std::string::npos ||
109  EnableAlways)
110  MPrintOptionsArray[AfterAddCopyBack] = true;
111  if (GraphPrintOpts.find("before_addHostAcc") != std::string::npos ||
112  EnableAlways)
113  MPrintOptionsArray[BeforeAddHostAcc] = true;
114  if (GraphPrintOpts.find("after_addHostAcc") != std::string::npos ||
115  EnableAlways)
116  MPrintOptionsArray[AfterAddHostAcc] = true;
117  if (GraphPrintOpts.find("after_fusionComplete") != std::string::npos ||
118  EnableAlways)
119  MPrintOptionsArray[AfterFusionComplete] = true;
120  if (GraphPrintOpts.find("after_fusionCancel") != std::string::npos ||
121  EnableAlways)
122  MPrintOptionsArray[AfterFusionCancel] = true;
123  }
124 }
125 
126 static bool markNodeAsVisited(Command *Cmd, std::vector<Command *> &Visited) {
127  assert(Cmd && "Cmd can't be nullptr");
128  if (Cmd->MMarks.MVisited)
129  return false;
130  Cmd->MMarks.MVisited = true;
131  Visited.push_back(Cmd);
132  return true;
133 }
134 
135 static void unmarkVisitedNodes(std::vector<Command *> &Visited) {
136  for (Command *Cmd : Visited)
137  Cmd->MMarks.MVisited = false;
138 }
139 
140 static void handleVisitedNodes(std::vector<Command *> &Visited) {
141  for (Command *Cmd : Visited) {
142  if (Cmd->MMarks.MToBeDeleted) {
143  if (Cmd->getType() == Command::FUSION &&
144  !static_cast<KernelFusionCommand *>(Cmd)->readyForDeletion()) {
145  // Fusion commands might still be needed because fusion might be
146  // aborted, but a later call to complete_fusion still needs to be able
147  // to return a valid event. Clean-up of fusion commands is therefore
148  // explicitly handled by start fusion.
149  return;
150  }
151  Cmd->getEvent()->setCommand(nullptr);
152  delete Cmd;
153  } else
154  Cmd->MMarks.MVisited = false;
155  }
156 }
157 
158 static void printDotRecursive(std::fstream &Stream,
159  std::vector<Command *> &Visited, Command *Cmd) {
160  if (!markNodeAsVisited(Cmd, Visited))
161  return;
162  for (Command *User : Cmd->MUsers) {
163  if (User)
164  printDotRecursive(Stream, Visited, User);
165  }
166  Cmd->printDot(Stream);
167 }
168 
169 void Scheduler::GraphBuilder::printGraphAsDot(const char *ModeName) {
170  static size_t Counter = 0;
171  std::string ModeNameStr(ModeName);
172  std::string FileName =
173  "graph_" + std::to_string(Counter) + ModeNameStr + ".dot";
174 
175  Counter++;
176 
177  std::fstream Stream(FileName, std::ios::out);
178  Stream << "strict digraph {" << std::endl;
179 
180  MVisitedCmds.clear();
181 
182  for (SYCLMemObjI *MemObject : MMemObjs)
183  for (Command *AllocaCmd : MemObject->MRecord->MAllocaCommands)
184  printDotRecursive(Stream, MVisitedCmds, AllocaCmd);
185 
186  Stream << "}" << std::endl;
187 
188  unmarkVisitedNodes(MVisitedCmds);
189 }
190 
191 MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) {
192  return MemObject->MRecord.get();
193 }
194 
195 MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
196  const QueueImplPtr &Queue, const Requirement *Req,
197  std::vector<Command *> &ToEnqueue) {
198  SYCLMemObjI *MemObject = Req->MSYCLMemObj;
199  MemObjRecord *Record = getMemObjRecord(MemObject);
200 
201  if (nullptr != Record)
202  return Record;
203 
204  const size_t LeafLimit = 8;
205  LeavesCollection::AllocateDependencyF AllocateDependency =
206  [this](Command *Dependant, Command *Dependency, MemObjRecord *Record,
207  LeavesCollection::EnqueueListT &ToEnqueue) {
208  // Add the old leaf as a dependency for the new one by duplicating one
209  // of the requirements for the current record
210  DepDesc Dep = findDepForRecord(Dependant, Record);
211  Dep.MDepCommand = Dependency;
212  std::vector<Command *> ToCleanUp;
213  Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp);
214  if (ConnectionCmd)
215  ToEnqueue.push_back(ConnectionCmd);
216 
217  --(Dependency->MLeafCounter);
218  if (Dependency->readyForCleanup())
219  ToCleanUp.push_back(Dependency);
220  for (Command *Cmd : ToCleanUp)
221  cleanupCommand(Cmd);
222  };
223 
224  const ContextImplPtr &InteropCtxPtr = Req->MSYCLMemObj->getInteropContext();
225  if (InteropCtxPtr) {
226  // The memory object has been constructed using interoperability constructor
227  // which means that there is already an allocation(cl_mem) in some context.
228  // Registering this allocation in the SYCL graph.
229 
230  std::vector<sycl::device> Devices =
231  InteropCtxPtr->get_info<info::context::devices>();
232  assert(Devices.size() != 0);
233  DeviceImplPtr Dev = detail::getSyclObjImpl(Devices[0]);
234 
235  // Since all the Scheduler commands require queue but we have only context
236  // here, we need to create a dummy queue bound to the context and one of the
237  // devices from the context.
238  QueueImplPtr InteropQueuePtr{new detail::queue_impl{
239  Dev, InteropCtxPtr, /*AsyncHandler=*/{}, /*PropertyList=*/{}}};
240 
241  MemObject->MRecord.reset(
242  new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency});
243  getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
244  ToEnqueue);
245  } else
246  MemObject->MRecord.reset(new MemObjRecord{Queue->getContextImplPtr(),
247  LeafLimit, AllocateDependency});
248 
249  MMemObjs.push_back(MemObject);
250  return MemObject->MRecord.get();
251 }
252 
253 void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
254  MemObjRecord *Record,
256  std::vector<Command *> &ToCleanUp) {
257 
258  const bool ReadOnlyReq = AccessMode == access::mode::read;
259  if (ReadOnlyReq)
260  return;
261 
262  for (Command *Cmd : Cmds) {
263  bool WasLeaf = Cmd->MLeafCounter > 0;
264  Cmd->MLeafCounter -= Record->MReadLeaves.remove(Cmd);
265  Cmd->MLeafCounter -= Record->MWriteLeaves.remove(Cmd);
266  if (WasLeaf && Cmd->readyForCleanup()) {
267  ToCleanUp.push_back(Cmd);
268  }
269  }
270 }
271 
272 void Scheduler::GraphBuilder::addNodeToLeaves(
274  std::vector<Command *> &ToEnqueue) {
275  LeavesCollection &Leaves{AccessMode == access::mode::read
276  ? Record->MReadLeaves
277  : Record->MWriteLeaves};
278  if (Leaves.push_back(Cmd, ToEnqueue))
279  ++Cmd->MLeafCounter;
280 }
281 
282 UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
283  MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
284  std::vector<Command *> &ToEnqueue) {
285  AllocaCommandBase *AllocaCmd =
286  findAllocaForReq(Record, Req, Queue->getContextImplPtr());
287  assert(AllocaCmd && "There must be alloca for requirement!");
288  UpdateHostRequirementCommand *UpdateCommand =
289  new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData);
290  // Need copy of requirement because after host accessor destructor call
291  // dependencies become invalid if requirement is stored by pointer.
292  const Requirement *StoredReq = UpdateCommand->getRequirement();
293 
294  std::set<Command *> Deps =
295  findDepsForReq(Record, Req, Queue->getContextImplPtr());
296  std::vector<Command *> ToCleanUp;
297  for (Command *Dep : Deps) {
298  Command *ConnCmd =
299  UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd}, ToCleanUp);
300  if (ConnCmd)
301  ToEnqueue.push_back(ConnCmd);
302  }
303  updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
304  addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode, ToEnqueue);
305  for (Command *Cmd : ToCleanUp)
306  cleanupCommand(Cmd);
307  return UpdateCommand;
308 }
309 
310 // Takes linked alloca commands. Makes AllocaCmdDst command active using map
311 // or unmap operation.
313  AllocaCommandBase *AllocaCmdDst,
314  access::mode MapMode) {
315  assert(AllocaCmdSrc->MLinkedAllocaCmd == AllocaCmdDst &&
316  "Expected linked alloca commands");
317  assert(AllocaCmdSrc->MIsActive &&
318  "Expected source alloca command to be active");
319 
320  if (AllocaCmdSrc->getQueue()->is_host()) {
321  UnMapMemObject *UnMapCmd = new UnMapMemObject(
322  AllocaCmdDst, *AllocaCmdDst->getRequirement(),
323  &AllocaCmdSrc->MMemAllocation, AllocaCmdDst->getQueue());
324 
325  std::swap(AllocaCmdSrc->MIsActive, AllocaCmdDst->MIsActive);
326 
327  return UnMapCmd;
328  }
329 
330  MapMemObject *MapCmd = new MapMemObject(
331  AllocaCmdSrc, *AllocaCmdSrc->getRequirement(),
332  &AllocaCmdDst->MMemAllocation, AllocaCmdSrc->getQueue(), MapMode);
333 
334  std::swap(AllocaCmdSrc->MIsActive, AllocaCmdDst->MIsActive);
335 
336  return MapCmd;
337 }
338 
339 Command *Scheduler::GraphBuilder::insertMemoryMove(
340  MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
341  std::vector<Command *> &ToEnqueue) {
342 
343  AllocaCommandBase *AllocaCmdDst =
344  getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
345  if (!AllocaCmdDst)
346  throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
347 
348  std::set<Command *> Deps =
349  findDepsForReq(Record, Req, Queue->getContextImplPtr());
350  Deps.insert(AllocaCmdDst);
351  // Get parent allocation of sub buffer to perform full copy of whole buffer
352  if (IsSuitableSubReq(Req)) {
353  if (AllocaCmdDst->getType() == Command::CommandType::ALLOCA_SUB_BUF)
354  AllocaCmdDst =
355  static_cast<AllocaSubBufCommand *>(AllocaCmdDst)->getParentAlloca();
356  }
357 
358  AllocaCommandBase *AllocaCmdSrc =
359  findAllocaForReq(Record, Req, Record->MCurContext);
360  if (!AllocaCmdSrc && IsSuitableSubReq(Req)) {
361  // Since no alloca command for the sub buffer requirement was found in the
362  // current context, need to find a parent alloca command for it (it must be
363  // there)
364  auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) {
365  bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(),
366  Record->MCurContext) &&
367  // Looking for a parent buffer alloca command
368  AllocaCmd->getType() == Command::CommandType::ALLOCA;
369  return Res;
370  };
371  const auto It =
372  std::find_if(Record->MAllocaCommands.begin(),
373  Record->MAllocaCommands.end(), IsSuitableAlloca);
374  AllocaCmdSrc = (Record->MAllocaCommands.end() != It) ? *It : nullptr;
375  }
376  if (!AllocaCmdSrc)
377  throw runtime_error("Cannot find buffer allocation",
378  PI_ERROR_INVALID_VALUE);
379  // Get parent allocation of sub buffer to perform full copy of whole buffer
380  if (IsSuitableSubReq(Req)) {
381  if (AllocaCmdSrc->getType() == Command::CommandType::ALLOCA_SUB_BUF)
382  AllocaCmdSrc =
383  static_cast<AllocaSubBufCommand *>(AllocaCmdSrc)->getParentAlloca();
384  else if (AllocaCmdSrc->getSYCLMemObj() != Req->MSYCLMemObj)
385  assert(false && "Inappropriate alloca command.");
386  }
387 
388  Command *NewCmd = nullptr;
389 
390  if (AllocaCmdSrc->MLinkedAllocaCmd == AllocaCmdDst) {
391  // Map write only as read-write
392  access::mode MapMode = Req->MAccessMode;
393  if (MapMode == access::mode::write)
394  MapMode = access::mode::read_write;
395  NewCmd = insertMapUnmapForLinkedCmds(AllocaCmdSrc, AllocaCmdDst, MapMode);
396  Record->MHostAccess = MapMode;
397  } else {
398 
399  if ((Req->MAccessMode == access::mode::discard_write) ||
400  (Req->MAccessMode == access::mode::discard_read_write)) {
401  Record->MCurContext = Queue->getContextImplPtr();
402  return nullptr;
403  } else {
404  // Full copy of buffer is needed to avoid loss of data that may be caused
405  // by copying specific range from host to device and backwards.
406  NewCmd =
407  new MemCpyCommand(*AllocaCmdSrc->getRequirement(), AllocaCmdSrc,
408  *AllocaCmdDst->getRequirement(), AllocaCmdDst,
409  AllocaCmdSrc->getQueue(), AllocaCmdDst->getQueue());
410  }
411  }
412  std::vector<Command *> ToCleanUp;
413  for (Command *Dep : Deps) {
414  Command *ConnCmd = NewCmd->addDep(
415  DepDesc{Dep, NewCmd->getRequirement(), AllocaCmdDst}, ToCleanUp);
416  if (ConnCmd)
417  ToEnqueue.push_back(ConnCmd);
418  }
419  updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp);
420  addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue);
421  for (Command *Cmd : ToCleanUp)
422  cleanupCommand(Cmd);
423  Record->MCurContext = Queue->getContextImplPtr();
424  return NewCmd;
425 }
426 
427 Command *Scheduler::GraphBuilder::remapMemoryObject(
428  MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd,
429  std::vector<Command *> &ToEnqueue) {
430  assert(HostAllocaCmd->getQueue()->is_host() &&
431  "Host alloca command expected");
432  assert(HostAllocaCmd->MIsActive && "Active alloca command expected");
433 
434  AllocaCommandBase *LinkedAllocaCmd = HostAllocaCmd->MLinkedAllocaCmd;
435  assert(LinkedAllocaCmd && "Linked alloca command expected");
436 
437  std::set<Command *> Deps = findDepsForReq(Record, Req, Record->MCurContext);
438 
439  UnMapMemObject *UnMapCmd = new UnMapMemObject(
440  LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(),
441  &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue());
442 
443  // Map write only as read-write
444  access::mode MapMode = Req->MAccessMode;
445  if (MapMode == access::mode::write)
446  MapMode = access::mode::read_write;
447  MapMemObject *MapCmd = new MapMemObject(
448  LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(),
449  &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue(), MapMode);
450 
451  std::vector<Command *> ToCleanUp;
452  for (Command *Dep : Deps) {
453  Command *ConnCmd = UnMapCmd->addDep(
454  DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd}, ToCleanUp);
455  if (ConnCmd)
456  ToEnqueue.push_back(ConnCmd);
457  }
458 
459  Command *ConnCmd = MapCmd->addDep(
460  DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd}, ToCleanUp);
461  if (ConnCmd)
462  ToEnqueue.push_back(ConnCmd);
463 
464  updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp);
465  addNodeToLeaves(Record, MapCmd, access::mode::read_write, ToEnqueue);
466  for (Command *Cmd : ToCleanUp)
467  cleanupCommand(Cmd);
468  Record->MHostAccess = MapMode;
469  return MapCmd;
470 }
471 
472 // The function adds copy operation of the up to date'st memory to the memory
473 // pointed by Req.
474 Command *
475 Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
476  std::vector<Command *> &ToEnqueue) {
477  QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue();
478  SYCLMemObjI *MemObj = Req->MSYCLMemObj;
479  MemObjRecord *Record = getMemObjRecord(MemObj);
480  if (Record && MPrintOptionsArray[BeforeAddCopyBack])
481  printGraphAsDot("before_addCopyBack");
482 
483  // Do nothing if there were no or only read operations with the memory object.
484  if (nullptr == Record || !Record->MMemModified)
485  return nullptr;
486 
487  std::set<Command *> Deps =
488  findDepsForReq(Record, Req, HostQueue->getContextImplPtr());
489  AllocaCommandBase *SrcAllocaCmd =
490  findAllocaForReq(Record, Req, Record->MCurContext);
491 
492  auto MemCpyCmdUniquePtr = std::make_unique<MemCpyCommandHost>(
493  *SrcAllocaCmd->getRequirement(), SrcAllocaCmd, *Req, &Req->MData,
494  SrcAllocaCmd->getQueue(), std::move(HostQueue));
495 
496  if (!MemCpyCmdUniquePtr)
497  throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
498 
499  MemCpyCommandHost *MemCpyCmd = MemCpyCmdUniquePtr.release();
500 
501  std::vector<Command *> ToCleanUp;
502  for (Command *Dep : Deps) {
503  Command *ConnCmd = MemCpyCmd->addDep(
504  DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp);
505  if (ConnCmd)
506  ToEnqueue.push_back(ConnCmd);
507  }
508 
509  updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
510  addNodeToLeaves(Record, MemCpyCmd, Req->MAccessMode, ToEnqueue);
511  for (Command *Cmd : ToCleanUp)
512  cleanupCommand(Cmd);
513  if (MPrintOptionsArray[AfterAddCopyBack])
514  printGraphAsDot("after_addCopyBack");
515  return MemCpyCmd;
516 }
517 
518 // The function implements SYCL host accessor logic: host accessor
519 // should provide access to the buffer in user space.
520 Command *
521 Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
522  std::vector<Command *> &ToEnqueue) {
523 
524  const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue();
525 
526  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
527  if (MPrintOptionsArray[BeforeAddHostAcc])
528  printGraphAsDot("before_addHostAccessor");
529  markModifiedIfWrite(Record, Req);
530 
531  AllocaCommandBase *HostAllocaCmd =
532  getOrCreateAllocaForReq(Record, Req, HostQueue, ToEnqueue);
533 
534  if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
535  Record->MCurContext)) {
536  if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
537  remapMemoryObject(Record, Req, HostAllocaCmd, ToEnqueue);
538  } else
539  insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
540 
541  Command *UpdateHostAccCmd =
542  insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue);
543 
544  // Need empty command to be blocked until host accessor is destructed
545  EmptyCommand *EmptyCmd =
546  addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue,
547  Command::BlockReason::HostAccessor, ToEnqueue);
548 
549  Req->MBlockedCmd = EmptyCmd;
550 
551  if (MPrintOptionsArray[AfterAddHostAcc])
552  printGraphAsDot("after_addHostAccessor");
553 
554  return UpdateHostAccCmd;
555 }
556 
557 Command *Scheduler::GraphBuilder::addCGUpdateHost(
558  std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &HostQueue,
559  std::vector<Command *> &ToEnqueue) {
560 
561  auto UpdateHost = static_cast<CGUpdateHost *>(CommandGroup.get());
562  Requirement *Req = UpdateHost->getReqToUpdate();
563 
564  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
565  return insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
566 }
567 
576 std::set<Command *>
577 Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record,
578  const Requirement *Req,
579  const ContextImplPtr &Context) {
580  std::set<Command *> RetDeps;
581  std::vector<Command *> Visited;
582  const bool ReadOnlyReq = Req->MAccessMode == access::mode::read;
583 
584  std::vector<Command *> ToAnalyze{Record->MWriteLeaves.toVector()};
585 
586  if (!ReadOnlyReq) {
587  std::vector<Command *> V{Record->MReadLeaves.toVector()};
588 
589  ToAnalyze.insert(ToAnalyze.begin(), V.begin(), V.end());
590  }
591 
592  while (!ToAnalyze.empty()) {
593  Command *DepCmd = ToAnalyze.back();
594  ToAnalyze.pop_back();
595 
596  std::vector<Command *> NewAnalyze;
597 
598  for (const DepDesc &Dep : DepCmd->MDeps) {
599  if (Dep.MDepRequirement->MSYCLMemObj != Req->MSYCLMemObj)
600  continue;
601 
602  bool CanBypassDep = false;
603  // If both only read
604  CanBypassDep |=
605  Dep.MDepRequirement->MAccessMode == access::mode::read && ReadOnlyReq;
606 
607  // If not overlap
608  CanBypassDep |= !doOverlap(Dep.MDepRequirement, Req);
609 
610  // Going through copying memory between contexts is not supported.
611  if (Dep.MDepCommand)
612  CanBypassDep &=
613  sameCtx(Context, Dep.MDepCommand->getQueue()->getContextImplPtr());
614 
615  if (!CanBypassDep) {
616  RetDeps.insert(DepCmd);
617  // No need to analyze deps of examining command as it's dependency
618  // itself.
619  NewAnalyze.clear();
620  break;
621  }
622 
623  if (markNodeAsVisited(Dep.MDepCommand, Visited))
624  NewAnalyze.push_back(Dep.MDepCommand);
625  }
626  ToAnalyze.insert(ToAnalyze.end(), NewAnalyze.begin(), NewAnalyze.end());
627  }
628  unmarkVisitedNodes(Visited);
629  return RetDeps;
630 }
631 
632 // A helper function for finding a command dependency on a specific memory
633 // object
634 DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd,
635  MemObjRecord *Record) {
636  for (const DepDesc &DD : Cmd->MDeps) {
637  if (getMemObjRecord(DD.MDepRequirement->MSYCLMemObj) == Record) {
638  return DD;
639  }
640  }
641  assert(false && "No dependency found for a leaf of the record");
642  return {nullptr, nullptr, nullptr};
643 }
644 
645 // The function searches for the alloca command matching context and
646 // requirement.
647 AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq(
648  MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context,
649  bool AllowConst) {
650  auto IsSuitableAlloca = [&Context, Req,
651  AllowConst](AllocaCommandBase *AllocaCmd) {
652  bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context);
653  if (IsSuitableSubReq(Req)) {
654  const Requirement *TmpReq = AllocaCmd->getRequirement();
655  Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF;
656  Res &= TmpReq->MOffsetInBytes == Req->MOffsetInBytes;
657  Res &= TmpReq->MSYCLMemObj->getSizeInBytes() ==
658  Req->MSYCLMemObj->getSizeInBytes();
659  Res &= AllowConst || !AllocaCmd->MIsConst;
660  }
661  return Res;
662  };
663  const auto It = std::find_if(Record->MAllocaCommands.begin(),
664  Record->MAllocaCommands.end(), IsSuitableAlloca);
665  return (Record->MAllocaCommands.end() != It) ? *It : nullptr;
666 }
667 
668 static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
669  if (const char *HUMConfig = SYCLConfig<SYCL_HOST_UNIFIED_MEMORY>::get()) {
670  if (std::strcmp(HUMConfig, "0") == 0)
671  return Ctx->is_host();
672  if (std::strcmp(HUMConfig, "1") == 0)
673  return true;
674  }
675  for (const device &Device : Ctx->getDevices()) {
676  if (!Device.get_info<info::device::host_unified_memory>())
677  return false;
678  }
679  return true;
680 }
681 
682 // The function searches for the alloca command matching context and
683 // requirement. If none exists, new allocation command is created.
684 // Note, creation of new allocation command can lead to the current context
685 // (Record->MCurContext) change.
686 AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
687  MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
688  std::vector<Command *> &ToEnqueue) {
689 
690  AllocaCommandBase *AllocaCmd = findAllocaForReq(
691  Record, Req, Queue->getContextImplPtr(), /*AllowConst=*/false);
692 
693  if (!AllocaCmd) {
694  std::vector<Command *> ToCleanUp;
695  if (IsSuitableSubReq(Req)) {
696  // Get parent requirement. It's hard to get right parents' range
697  // so full parent requirement has range represented in bytes
698  range<3> ParentRange{Req->MSYCLMemObj->getSizeInBytes(), 1, 1};
699  Requirement ParentRequirement(/*Offset*/ {0, 0, 0}, ParentRange,
700  ParentRange, access::mode::read_write,
701  Req->MSYCLMemObj, /*Dims*/ 1,
702  /*Working with bytes*/ sizeof(char));
703 
704  auto *ParentAlloca =
705  getOrCreateAllocaForReq(Record, &ParentRequirement, Queue, ToEnqueue);
706  AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca, ToEnqueue,
707  ToCleanUp);
708  } else {
709 
710  const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange,
711  Req->MMemoryRange, access::mode::read_write,
712  Req->MSYCLMemObj, Req->MDims, Req->MElemSize,
713  0 /*ReMOffsetInBytes*/, false /*MIsSubBuffer*/);
714  // Can reuse user data for the first allocation. Do so if host unified
715  // memory is supported regardless of the access mode (the pointer will be
716  // reused). For devices without host unified memory the initialization
717  // will be performed as a write operation.
718  // TODO the case where the first alloca is made with a discard mode and
719  // the user pointer is read-only is still not handled: it leads to
720  // unnecessary copy on devices with unified host memory support.
721  const bool HostUnifiedMemory =
722  checkHostUnifiedMemory(Queue->getContextImplPtr());
723  // TODO casting is required here to get the necessary information
724  // without breaking ABI, replace with the next major version.
725  auto *MemObj = static_cast<SYCLMemObjT *>(Req->MSYCLMemObj);
726  const bool InitFromUserData = Record->MAllocaCommands.empty() &&
727  (HostUnifiedMemory || MemObj->isInterop());
728  AllocaCommandBase *LinkedAllocaCmd = nullptr;
729 
730  // For the first allocation on a device without host unified memory we
731  // might need to also create a host alloca right away in order to perform
732  // the initial memory write.
733  if (Record->MAllocaCommands.empty()) {
734  if (!HostUnifiedMemory &&
735  Req->MAccessMode != access::mode::discard_write &&
736  Req->MAccessMode != access::mode::discard_read_write) {
737  // There's no need to make a host allocation if the buffer is not
738  // initialized with user data.
739  if (MemObj->hasUserDataPtr()) {
740  QueueImplPtr DefaultHostQueue =
741  Scheduler::getInstance().getDefaultHostQueue();
742  AllocaCommand *HostAllocaCmd = new AllocaCommand(
743  DefaultHostQueue, FullReq, true /* InitFromUserData */,
744  nullptr /* LinkedAllocaCmd */,
745  MemObj->isHostPointerReadOnly() /* IsConst */);
746  Record->MAllocaCommands.push_back(HostAllocaCmd);
747  Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue);
748  ++(HostAllocaCmd->MLeafCounter);
749  Record->MCurContext = DefaultHostQueue->getContextImplPtr();
750  }
751  }
752  } else {
753  // If it is not the first allocation, try to setup a link
754  // FIXME: Temporary limitation, linked alloca commands for an image is
755  // not supported because map operation is not implemented for an image.
756  if (Req->MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer)
757  // Current limitation is to setup link between current allocation and
758  // new one. There could be situations when we could setup link with
759  // "not" current allocation, but it will require memory copy.
760  // Can setup link between cl and host allocations only
761  if (Queue->is_host() != Record->MCurContext->is_host()) {
762  // Linked commands assume that the host allocation is reused by the
763  // plugin runtime and that can lead to unnecessary copy overhead on
764  // devices that do not support host unified memory. Do not link the
765  // allocations in this case.
766  // However, if the user explicitly requests use of pinned host
767  // memory, map/unmap operations are expected to work faster than
768  // read/write from/to an artbitrary host pointer. Link such commands
769  // regardless of host unified memory support.
770  bool PinnedHostMemory = MemObj->has_property<
771  sycl::ext::oneapi::property::buffer::use_pinned_host_memory>();
772 
773  bool HostUnifiedMemoryOnNonHostDevice =
774  Queue->is_host() ? checkHostUnifiedMemory(Record->MCurContext)
775  : HostUnifiedMemory;
776  if (PinnedHostMemory || HostUnifiedMemoryOnNonHostDevice) {
777  AllocaCommandBase *LinkedAllocaCmdCand = findAllocaForReq(
778  Record, Req, Record->MCurContext, /*AllowConst=*/false);
779 
780  // Cannot setup link if candidate is linked already
781  if (LinkedAllocaCmdCand &&
782  !LinkedAllocaCmdCand->MLinkedAllocaCmd) {
783  LinkedAllocaCmd = LinkedAllocaCmdCand;
784  }
785  }
786  }
787  }
788 
789  AllocaCmd =
790  new AllocaCommand(Queue, FullReq, InitFromUserData, LinkedAllocaCmd);
791 
792  // Update linked command
793  if (LinkedAllocaCmd) {
794  Command *ConnCmd = AllocaCmd->addDep(
795  DepDesc{LinkedAllocaCmd, AllocaCmd->getRequirement(),
796  LinkedAllocaCmd},
797  ToCleanUp);
798  if (ConnCmd)
799  ToEnqueue.push_back(ConnCmd);
800  LinkedAllocaCmd->MLinkedAllocaCmd = AllocaCmd;
801 
802  // To ensure that the leader allocation is removed first
803  ConnCmd = AllocaCmd->getReleaseCmd()->addDep(
804  DepDesc(LinkedAllocaCmd->getReleaseCmd(),
805  AllocaCmd->getRequirement(), LinkedAllocaCmd),
806  ToCleanUp);
807  if (ConnCmd)
808  ToEnqueue.push_back(ConnCmd);
809 
810  // Device allocation takes ownership of the host ptr during
811  // construction, host allocation doesn't. So, device allocation should
812  // always be active here. Also if the "follower" command is a device one
813  // we have to change current context to the device one.
814  if (Queue->is_host()) {
815  AllocaCmd->MIsActive = false;
816  } else {
817  LinkedAllocaCmd->MIsActive = false;
818  Record->MCurContext = Queue->getContextImplPtr();
819 
820  std::set<Command *> Deps =
821  findDepsForReq(Record, Req, Queue->getContextImplPtr());
822  for (Command *Dep : Deps) {
823  Command *ConnCmd = AllocaCmd->addDep(
824  DepDesc{Dep, Req, LinkedAllocaCmd}, ToCleanUp);
825  if (ConnCmd)
826  ToEnqueue.push_back(ConnCmd);
827  }
828  updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
829  addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode, ToEnqueue);
830  }
831  }
832  }
833 
834  Record->MAllocaCommands.push_back(AllocaCmd);
835  Record->MWriteLeaves.push_back(AllocaCmd, ToEnqueue);
836  ++(AllocaCmd->MLeafCounter);
837  for (Command *Cmd : ToCleanUp)
838  cleanupCommand(Cmd);
839  }
840  return AllocaCmd;
841 }
842 
843 // The function sets MemModified flag in record if requirement has write access.
844 void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record,
845  Requirement *Req) {
846  switch (Req->MAccessMode) {
847  case access::mode::write:
849  case access::mode::discard_write:
850  case access::mode::discard_read_write:
851  case access::mode::atomic:
852  Record->MMemModified = true;
853  break;
854  case access::mode::read:
855  break;
856  }
857 }
858 
859 EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(
860  Command *Cmd, const std::vector<Requirement *> &Reqs,
861  const QueueImplPtr &Queue, Command::BlockReason Reason,
862  std::vector<Command *> &ToEnqueue, const bool AddDepsToLeaves) {
863  EmptyCommand *EmptyCmd =
864  new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue());
865 
866  if (!EmptyCmd)
867  throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
868 
869  EmptyCmd->MIsBlockable = true;
870  EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked;
871  EmptyCmd->MBlockReason = Reason;
872 
873  for (Requirement *Req : Reqs) {
874  MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req, ToEnqueue);
875  AllocaCommandBase *AllocaCmd =
876  getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
877  EmptyCmd->addRequirement(Cmd, AllocaCmd, Req);
878  }
879  // addRequirement above call addDep that already will add EmptyCmd as user for
880  // Cmd no Reqs size check here so assume it is possible to have no Reqs passed
881  if (!Reqs.size())
882  Cmd->addUser(EmptyCmd);
883 
884  if (AddDepsToLeaves) {
885  const std::vector<DepDesc> &Deps = Cmd->MDeps;
886  std::vector<Command *> ToCleanUp;
887  for (const DepDesc &Dep : Deps) {
888  const Requirement *Req = Dep.MDepRequirement;
889  MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
890 
891  updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp);
892  addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue);
893  }
894  for (Command *Cmd : ToCleanUp)
895  cleanupCommand(Cmd);
896  }
897 
898  return EmptyCmd;
899 }
900 
901 static bool isInteropHostTask(ExecCGCommand *Cmd) {
902  if (Cmd->getCG().getType() != CG::CGTYPE::CodeplayHostTask)
903  return false;
904 
905  const detail::CGHostTask &HT =
906  static_cast<detail::CGHostTask &>(Cmd->getCG());
907 
908  return HT.MHostTask->isInteropTask();
909 }
910 
911 static void combineAccessModesOfReqs(std::vector<Requirement *> &Reqs) {
912  std::unordered_map<SYCLMemObjI *, access::mode> CombinedModes;
913  bool HasDuplicateMemObjects = false;
914  for (const Requirement *Req : Reqs) {
915  auto Result = CombinedModes.insert(
916  std::make_pair(Req->MSYCLMemObj, Req->MAccessMode));
917  if (!Result.second) {
918  Result.first->second =
919  combineAccessModes(Result.first->second, Req->MAccessMode);
920  HasDuplicateMemObjects = true;
921  }
922  }
923 
924  if (!HasDuplicateMemObjects)
925  return;
926  for (Requirement *Req : Reqs) {
927  Req->MAccessMode = CombinedModes[Req->MSYCLMemObj];
928  }
929 }
930 
931 Scheduler::GraphBuildResult
932 Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
933  const QueueImplPtr &Queue,
934  std::vector<Command *> &ToEnqueue) {
935  std::vector<Requirement *> &Reqs = CommandGroup->MRequirements;
936  std::vector<detail::EventImplPtr> &Events = CommandGroup->MEvents;
937 
938  auto NewCmd = std::make_unique<ExecCGCommand>(std::move(CommandGroup), Queue);
939  if (!NewCmd)
940  throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
941 
942  // Host tasks cannot participate in fusion. They take the regular route. If
943  // they create any requirement or event dependency on any of the kernels in
944  // the fusion list, this will lead to cancellation of the fusion in the
945  // GraphProcessor.
946  auto QUniqueID = std::hash<QueueImplPtr>()(Queue);
947  if (isInFusionMode(QUniqueID) && !NewCmd->isHostTask()) {
948  auto *FusionCmd = findFusionList(QUniqueID)->second.get();
949 
950  bool dependsOnFusion = false;
951  for (auto Ev = Events.begin(); Ev != Events.end();) {
952  auto *EvDepCmd = static_cast<Command *>((*Ev)->getCommand());
953  if (!EvDepCmd) {
954  continue;
955  }
956  // Handle event dependencies on any commands part of another active
957  // fusion.
958  if (EvDepCmd->getQueue() != Queue && isPartOfActiveFusion(EvDepCmd)) {
959  printFusionWarning("Aborting fusion because of event dependency from a "
960  "different fusion");
961  cancelFusion(EvDepCmd->getQueue(), ToEnqueue);
962  }
963  // Check if this command depends on the placeholder command for the fusion
964  // itself participates in.
965  if (EvDepCmd == FusionCmd) {
966  Ev = Events.erase(Ev);
967  dependsOnFusion = true;
968  } else {
969  ++Ev;
970  }
971  }
972 
973  // If this command has an explicit event dependency on the placeholder
974  // command for this fusion (because it used depends_on on the event returned
975  // by submitting another kernel to this fusion earlier), add a dependency on
976  // all the commands in the fusion list so far.
977  if (dependsOnFusion) {
978  for (auto *Cmd : FusionCmd->getFusionList()) {
979  Events.push_back(Cmd->getEvent());
980  }
981  }
982 
983  // Add the kernel to the graph, but delay the enqueue of any auxiliary
984  // commands (e.g., allocations) resulting from that process by adding them
985  // to the list of auxiliary commands of the fusion command.
986  createGraphForCommand(NewCmd.get(), NewCmd->getCG(),
987  isInteropHostTask(NewCmd.get()), Reqs, Events, Queue,
988  FusionCmd->auxiliaryCommands());
989 
990  // Set the fusion command, so we recognize when another command depends on a
991  // kernel in the fusion list.
992  FusionCmd->addToFusionList(NewCmd.get());
993  NewCmd->MFusionCmd = FusionCmd;
994  std::vector<Command *> ToCleanUp;
995  // Add an event dependency from the fusion placeholder command to the new
996  // kernel.
997  auto ConnectionCmd = FusionCmd->addDep(NewCmd->getEvent(), ToCleanUp);
998  if (ConnectionCmd) {
999  FusionCmd->auxiliaryCommands().push_back(ConnectionCmd);
1000  }
1001  return {NewCmd.release(), FusionCmd->getEvent(), false};
1002  }
1003  createGraphForCommand(NewCmd.get(), NewCmd->getCG(),
1004  isInteropHostTask(NewCmd.get()), Reqs, Events, Queue,
1005  ToEnqueue);
1006  auto Event = NewCmd->getEvent();
1007  return {NewCmd.release(), Event, true};
1008 }
1009 
1010 void Scheduler::GraphBuilder::createGraphForCommand(
1011  Command *NewCmd, CG &CG, bool isInteropTask,
1012  std::vector<Requirement *> &Reqs,
1013  const std::vector<detail::EventImplPtr> &Events, QueueImplPtr Queue,
1014  std::vector<Command *> &ToEnqueue) {
1015 
1016  if (MPrintOptionsArray[BeforeAddCG])
1017  printGraphAsDot("before_addCG");
1018 
1019  // If there are multiple requirements for the same memory object, its
1020  // AllocaCommand creation will be dependent on the access mode of the first
1021  // requirement. Combine these access modes to take all of them into account.
1023  std::vector<Command *> ToCleanUp;
1024  for (Requirement *Req : Reqs) {
1025  MemObjRecord *Record = nullptr;
1026  AllocaCommandBase *AllocaCmd = nullptr;
1027 
1028  bool isSameCtx = false;
1029 
1030  {
1031  const QueueImplPtr &QueueForAlloca =
1032  isInteropTask ? static_cast<detail::CGHostTask &>(CG).MQueue : Queue;
1033 
1034  Record = getOrInsertMemObjRecord(QueueForAlloca, Req, ToEnqueue);
1035  markModifiedIfWrite(Record, Req);
1036 
1037  AllocaCmd =
1038  getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue);
1039 
1040  isSameCtx =
1041  sameCtx(QueueForAlloca->getContextImplPtr(), Record->MCurContext);
1042  }
1043 
1044  // If there is alloca command we need to check if the latest memory is in
1045  // required context.
1046  if (isSameCtx) {
1047  // If the memory is already in the required host context, check if the
1048  // required access mode is valid, remap if not.
1049  if (Record->MCurContext->is_host() &&
1050  !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
1051  remapMemoryObject(Record, Req, AllocaCmd, ToEnqueue);
1052  } else {
1053  // Cannot directly copy memory from OpenCL device to OpenCL device -
1054  // create two copies: device->host and host->device.
1055  bool NeedMemMoveToHost = false;
1056  auto MemMoveTargetQueue = Queue;
1057 
1058  if (isInteropTask) {
1059  const detail::CGHostTask &HT = static_cast<detail::CGHostTask &>(CG);
1060 
1061  if (HT.MQueue->getContextImplPtr() != Record->MCurContext) {
1062  NeedMemMoveToHost = true;
1063  MemMoveTargetQueue = HT.MQueue;
1064  }
1065  } else if (!Queue->is_host() && !Record->MCurContext->is_host())
1066  NeedMemMoveToHost = true;
1067 
1068  if (NeedMemMoveToHost)
1069  insertMemoryMove(Record, Req,
1070  Scheduler::getInstance().getDefaultHostQueue(),
1071  ToEnqueue);
1072  insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
1073  }
1074  std::set<Command *> Deps =
1075  findDepsForReq(Record, Req, Queue->getContextImplPtr());
1076 
1077  for (Command *Dep : Deps) {
1078  if (Dep != NewCmd) {
1079  Command *ConnCmd =
1080  NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}, ToCleanUp);
1081  if (ConnCmd)
1082  ToEnqueue.push_back(ConnCmd);
1083  }
1084  }
1085  }
1086 
1087  // Set new command as user for dependencies and update leaves.
1088  // Node dependencies can be modified further when adding the node to leaves,
1089  // iterate over their copy.
1090  // FIXME employ a reference here to eliminate copying of a vector
1091  std::vector<DepDesc> Deps = NewCmd->MDeps;
1092  for (DepDesc &Dep : Deps) {
1093  const Requirement *Req = Dep.MDepRequirement;
1094  MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
1095  updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode, ToCleanUp);
1096  addNodeToLeaves(Record, NewCmd, Req->MAccessMode, ToEnqueue);
1097  }
1098 
1099  // Register all the events as dependencies
1100  for (detail::EventImplPtr e : Events) {
1101  if (e->getCommand() && e->getCommand() == NewCmd) {
1102  continue;
1103  }
1104  if (Command *ConnCmd = NewCmd->addDep(e, ToCleanUp))
1105  ToEnqueue.push_back(ConnCmd);
1106  }
1107 
1108  if (MPrintOptionsArray[AfterAddCG])
1109  printGraphAsDot("after_addCG");
1110 
1111  for (Command *Cmd : ToCleanUp) {
1112  cleanupCommand(Cmd);
1113  }
1114 }
1115 
1116 void Scheduler::GraphBuilder::decrementLeafCountersForRecord(
1117  MemObjRecord *Record) {
1118  for (Command *Cmd : Record->MReadLeaves) {
1119  --(Cmd->MLeafCounter);
1120  if (Cmd->readyForCleanup())
1121  cleanupCommand(Cmd);
1122  }
1123  for (Command *Cmd : Record->MWriteLeaves) {
1124  --(Cmd->MLeafCounter);
1125  if (Cmd->readyForCleanup())
1126  cleanupCommand(Cmd);
1127  }
1128 }
1129 
1130 void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) {
1131  std::vector<AllocaCommandBase *> &AllocaCommands = Record->MAllocaCommands;
1132  if (AllocaCommands.empty())
1133  return;
1134 
1135  assert(MCmdsToVisit.empty());
1136  MVisitedCmds.clear();
1137 
1138  // First, mark all allocas for deletion and their direct users for traversal
1139  // Dependencies of the users will be cleaned up during the traversal
1140  for (Command *AllocaCmd : AllocaCommands) {
1141  markNodeAsVisited(AllocaCmd, MVisitedCmds);
1142 
1143  for (Command *UserCmd : AllocaCmd->MUsers)
1144  // Linked alloca cmd may be in users of this alloca. We're not going to
1145  // visit it.
1146  if (UserCmd->getType() != Command::CommandType::ALLOCA)
1147  MCmdsToVisit.push(UserCmd);
1148  else
1149  markNodeAsVisited(UserCmd, MVisitedCmds);
1150 
1151  AllocaCmd->MMarks.MToBeDeleted = true;
1152  // These commands will be deleted later, clear users now to avoid
1153  // updating them during edge removal
1154  AllocaCmd->MUsers.clear();
1155  }
1156 
1157  // Make sure the Linked Allocas are marked visited by the previous walk.
1158  // Remove allocation commands from the users of their dependencies.
1159  for (AllocaCommandBase *AllocaCmd : AllocaCommands) {
1160  AllocaCommandBase *LinkedCmd = AllocaCmd->MLinkedAllocaCmd;
1161 
1162  if (LinkedCmd) {
1163  assert(LinkedCmd->MMarks.MVisited);
1164  }
1165 
1166  for (DepDesc &Dep : AllocaCmd->MDeps)
1167  if (Dep.MDepCommand)
1168  Dep.MDepCommand->MUsers.erase(AllocaCmd);
1169  }
1170 
1171  // Traverse the graph using BFS
1172  while (!MCmdsToVisit.empty()) {
1173  Command *Cmd = MCmdsToVisit.front();
1174  MCmdsToVisit.pop();
1175 
1176  if (!markNodeAsVisited(Cmd, MVisitedCmds))
1177  continue;
1178 
1179  for (Command *UserCmd : Cmd->MUsers)
1180  if (UserCmd->getType() != Command::CommandType::ALLOCA)
1181  MCmdsToVisit.push(UserCmd);
1182 
1183  // Delete all dependencies on any allocations being removed
1184  // Track which commands should have their users updated
1185  std::map<Command *, bool> ShouldBeUpdated;
1186  auto NewEnd = std::remove_if(
1187  Cmd->MDeps.begin(), Cmd->MDeps.end(), [&](const DepDesc &Dep) {
1188  if (std::find(AllocaCommands.begin(), AllocaCommands.end(),
1189  Dep.MAllocaCmd) != AllocaCommands.end()) {
1190  ShouldBeUpdated.insert({Dep.MDepCommand, true});
1191  return true;
1192  }
1193  ShouldBeUpdated[Dep.MDepCommand] = false;
1194  return false;
1195  });
1196  Cmd->MDeps.erase(NewEnd, Cmd->MDeps.end());
1197 
1198  // Update users of removed dependencies
1199  for (auto DepCmdIt : ShouldBeUpdated) {
1200  if (!DepCmdIt.second)
1201  continue;
1202  DepCmdIt.first->MUsers.erase(Cmd);
1203  }
1204 
1205  // If all dependencies have been removed this way, mark the command for
1206  // deletion
1207  if (Cmd->MDeps.empty()) {
1208  Cmd->MUsers.clear();
1209  // Do not delete the node if it's scheduled for post-enqueue cleanup to
1210  // avoid double free.
1211  if (!Cmd->MMarkedForCleanup)
1212  Cmd->MMarks.MToBeDeleted = true;
1213  }
1214  }
1215 
1216  handleVisitedNodes(MVisitedCmds);
1217 }
1218 
1219 void Scheduler::GraphBuilder::cleanupCommand(Command *Cmd,
1220  bool AllowUnsubmitted) {
1222  static bool DeprWarningPrinted = false;
1223  if (!DeprWarningPrinted) {
1224  std::cerr << "WARNING: The enviroment variable "
1225  "SYCL_DISABLE_POST_ENQUEUE_CLEANUP is deprecated. Please "
1226  "use SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP instead.\n";
1227  DeprWarningPrinted = true;
1228  }
1229  return;
1230  }
1232  return;
1233 
1234  assert(Cmd->MLeafCounter == 0 &&
1235  (Cmd->isSuccessfullyEnqueued() || AllowUnsubmitted));
1236  Command::CommandType CmdT = Cmd->getType();
1237 
1238  assert(CmdT != Command::ALLOCA && CmdT != Command::ALLOCA_SUB_BUF);
1239  assert(CmdT != Command::RELEASE);
1240  (void)CmdT;
1241 
1242  for (Command *UserCmd : Cmd->MUsers) {
1243  for (DepDesc &Dep : UserCmd->MDeps) {
1244  // Link the users of the command to the alloca command(s) instead
1245  if (Dep.MDepCommand == Cmd) {
1246  // ... unless the user is the alloca itself.
1247  if (Dep.MAllocaCmd == UserCmd) {
1248  Dep.MDepCommand = nullptr;
1249  } else {
1250  Dep.MDepCommand = Dep.MAllocaCmd;
1251  Dep.MDepCommand->MUsers.insert(UserCmd);
1252  }
1253  }
1254  }
1255  }
1256  // Update dependency users
1257  for (DepDesc &Dep : Cmd->MDeps) {
1258  Command *DepCmd = Dep.MDepCommand;
1259  DepCmd->MUsers.erase(Cmd);
1260  }
1261 
1262  if (Cmd->getType() == Command::FUSION &&
1263  !static_cast<KernelFusionCommand *>(Cmd)->readyForDeletion()) {
1264  // Fusion commands might still be needed because fusion might be aborted,
1265  // but a later call to complete_fusion still needs to be able to return a
1266  // valid event. Clean-up of fusion commands is therefore explicitly handled
1267  // by start fusion.
1268  return;
1269  }
1270  Cmd->getEvent()->setCommand(nullptr);
1271  delete Cmd;
1272 }
1273 
1274 void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) {
1275  const auto It = std::find_if(
1276  MMemObjs.begin(), MMemObjs.end(),
1277  [MemObject](const SYCLMemObjI *Obj) { return Obj == MemObject; });
1278  if (It != MMemObjs.end())
1279  MMemObjs.erase(It);
1280  MemObject->MRecord.reset();
1281 }
1282 
1283 // Make Cmd depend on DepEvent from different context. Connection is performed
1284 // via distinct ConnectCmd with host task command group on host queue. Cmd will
1285 // depend on ConnectCmd's host event.
1286 // DepEvent may not have a command associated with it in at least two cases:
1287 // - the command was deleted upon cleanup process;
1288 // - DepEvent is user event.
1289 // In both of these cases the only thing we can do is to make ConnectCmd depend
1290 // on DepEvent.
1291 // Otherwise, when there is a command associated with DepEvent, we make
1292 // ConnectCmd depend on on this command. If there is valid, i.e. non-nil,
1293 // requirement in Dep we make ConnectCmd depend on DepEvent's command with this
1294 // requirement.
1295 // Optionality of Dep is set by Dep.MDepCommand equal to nullptr.
1296 Command *Scheduler::GraphBuilder::connectDepEvent(
1297  Command *const Cmd, const EventImplPtr &DepEvent, const DepDesc &Dep,
1298  std::vector<Command *> &ToCleanUp) {
1299  assert(Cmd->getWorkerContext() != DepEvent->getContextImpl());
1300 
1301  // construct Host Task type command manually and make it depend on DepEvent
1302  ExecCGCommand *ConnectCmd = nullptr;
1303 
1304  try {
1305  std::unique_ptr<detail::HostTask> HT(new detail::HostTask);
1306  std::unique_ptr<detail::CG> ConnectCG(new detail::CGHostTask(
1307  std::move(HT), /* Queue = */ {}, /* Context = */ {}, /* Args = */ {},
1308  /* ArgsStorage = */ {}, /* AccStorage = */ {},
1309  /* SharedPtrStorage = */ {}, /* Requirements = */ {},
1310  /* DepEvents = */ {DepEvent}, CG::CodeplayHostTask,
1311  /* Payload */ {}));
1312  ConnectCmd = new ExecCGCommand(
1313  std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue());
1314  } catch (const std::bad_alloc &) {
1315  throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
1316  }
1317 
1318  if (Dep.MDepRequirement) {
1319  // make ConnectCmd depend on requirement
1320  // Dismiss the result here as it's not a connection now,
1321  // 'cause ConnectCmd is host one
1322  (void)ConnectCmd->addDep(Dep, ToCleanUp);
1323  assert(reinterpret_cast<Command *>(DepEvent->getCommand()) ==
1324  Dep.MDepCommand);
1325  // add user to Dep.MDepCommand is already performed beyond this if branch
1326  {
1327  DepDesc DepOnConnect = Dep;
1328  DepOnConnect.MDepCommand = ConnectCmd;
1329 
1330  // Dismiss the result here as it's not a connection now,
1331  // 'cause ConnectCmd is host one
1332  std::ignore = Cmd->addDep(DepOnConnect, ToCleanUp);
1333  }
1334  } else {
1335  // It is required condition in another a path and addUser will be set in
1336  // addDep
1337  if (Command *DepCmd = reinterpret_cast<Command *>(DepEvent->getCommand()))
1338  DepCmd->addUser(ConnectCmd);
1339 
1340  std::ignore = ConnectCmd->addDep(DepEvent, ToCleanUp);
1341 
1342  std::ignore = Cmd->addDep(ConnectCmd->getEvent(), ToCleanUp);
1343 
1344  ConnectCmd->addUser(Cmd);
1345  }
1346 
1347  return ConnectCmd;
1348 }
1349 
1350 void Scheduler::GraphBuilder::startFusion(QueueImplPtr Queue) {
1351  auto QUniqueID = std::hash<QueueImplPtr>()(Queue);
1352  if (isInFusionMode(QUniqueID)) {
1353  throw sycl::exception{sycl::make_error_code(sycl::errc::invalid),
1354  "Queue already in fusion mode"};
1355  }
1356  auto OldFusionCmd = findFusionList(QUniqueID);
1357  if (OldFusionCmd != MFusionMap.end()) {
1358  // If fusion was used on this queue previously, the old fusion command might
1359  // still be around to make sure that even after
1360  // cancellation of the fusion due to synchronization, complete_fusion is
1361  // still able to return a valid event.
1362  OldFusionCmd->second->setFusionStatus(
1363  KernelFusionCommand::FusionStatus::DELETED);
1364  cleanupCommand(OldFusionCmd->second.release());
1365  MFusionMap.erase(OldFusionCmd);
1366  }
1367  MFusionMap.emplace(QUniqueID, std::make_unique<KernelFusionCommand>(Queue));
1368 }
1369 
1370 void Scheduler::GraphBuilder::removeNodeFromGraph(
1371  Command *Node, std::vector<Command *> &ToEnqueue) {
1372  // Remove the placeholder command as leaf of all its requirements and from the
1373  // user list of all its dependencies.
1374  for (auto &Dep : Node->MDeps) {
1375  auto AccessMode = Dep.MDepRequirement->MAccessMode;
1376  auto *Record = getMemObjRecord(Dep.MDepRequirement->MSYCLMemObj);
1377 
1378  Node->MLeafCounter -= Record->MReadLeaves.remove(Node);
1379  Node->MLeafCounter -= Record->MWriteLeaves.remove(Node);
1380  // If the placeholder had a write-requirement on this record, we need to
1381  // restore the previous leaves.
1382  if (AccessMode != access::mode::read) {
1383  for (auto PrevDep : Dep.MDepCommand->MDeps) {
1384  auto *DepReq = PrevDep.MDepRequirement;
1385  auto *DepRecord = getMemObjRecord(DepReq->MSYCLMemObj);
1386  if (DepRecord == Record) {
1387  // Need to restore this as a leaf, because we pushed it from the
1388  // leaves when adding the placeholder command.
1389  assert(Dep.MDepCommand);
1390  addNodeToLeaves(Record, Dep.MDepCommand, DepReq->MAccessMode,
1391  ToEnqueue);
1392  }
1393  }
1394  }
1395  Dep.MDepCommand->MUsers.erase(Node);
1396  }
1397 
1398  Node->MDeps.clear();
1399 }
1400 
1401 void Scheduler::GraphBuilder::cancelFusion(QueueImplPtr Queue,
1402  std::vector<Command *> &ToEnqueue) {
1403  auto QUniqueID = std::hash<QueueImplPtr>()(Queue);
1404  if (!isInFusionMode(QUniqueID)) {
1405  return;
1406  }
1407  auto FusionList = findFusionList(QUniqueID);
1408 
1409  auto *PlaceholderCmd = (*FusionList).second.get();
1410 
1411  // Enqueue all the kernels/commands from the fusion list
1412  auto FusedCmdList = PlaceholderCmd->getFusionList();
1413  ToEnqueue.insert(ToEnqueue.end(), FusedCmdList.begin(), FusedCmdList.end());
1414 
1415  // The commands establishing an event dependency between the fusion
1416  // placeholder command and the individual kernels need to be enqueued.
1417  ToEnqueue.insert(ToEnqueue.end(), PlaceholderCmd->auxiliaryCommands().begin(),
1418  PlaceholderCmd->auxiliaryCommands().end());
1419 
1420  ToEnqueue.push_back(PlaceholderCmd);
1421 
1422  if (MPrintOptionsArray[AfterFusionCancel]) {
1423  printGraphAsDot("after_fusionCancel");
1424  }
1425 
1426  // Set the status for the fusion command
1427  PlaceholderCmd->setFusionStatus(KernelFusionCommand::FusionStatus::CANCELLED);
1428 }
1429 
1430 static bool isPartOfFusion(Command *Cmd, KernelFusionCommand *Fusion) {
1431  if (Cmd->getType() == Command::RUN_CG) {
1432  return static_cast<ExecCGCommand *>(Cmd)->MFusionCmd == Fusion;
1433  }
1434  return false;
1435 }
1436 
1437 static bool checkForCircularDependency(Command *, bool, KernelFusionCommand *);
1438 
1439 static bool createsCircularDependency(Command *Cmd, bool PredPartOfFusion,
1440  KernelFusionCommand *Fusion) {
1441  if (isPartOfFusion(Cmd, Fusion)) {
1442  // If this is part of the fusion and the predecessor also was, we can stop
1443  // the traversal here. A direct dependency between two kernels in the same
1444  // fusion will never form a cyclic dependency and by iterating over all
1445  // commands in a fusion, we will detect any cycles originating from the
1446  // current command.
1447  // If the predecessor was not part of the fusion, but the current command
1448  // is, we have found a potential cycle in the dependency graph.
1449  return !PredPartOfFusion;
1450  }
1451  return checkForCircularDependency(Cmd, false, Fusion);
1452 }
1453 
1454 static bool checkForCircularDependency(Command *Cmd, bool IsPartOfFusion,
1455  KernelFusionCommand *Fusion) {
1456  // Check the requirement dependencies.
1457  for (auto &Dep : Cmd->MDeps) {
1458  auto *DepCmd = Dep.MDepCommand;
1459  if (!DepCmd) {
1460  continue;
1461  }
1462  if (createsCircularDependency(DepCmd, IsPartOfFusion, Fusion)) {
1463  return true;
1464  }
1465  }
1466  for (auto &Ev : Cmd->getPreparedDepsEvents()) {
1467  auto *EvDepCmd = static_cast<Command *>(Ev->getCommand());
1468  if (!EvDepCmd) {
1469  continue;
1470  }
1471  if (createsCircularDependency(EvDepCmd, IsPartOfFusion, Fusion)) {
1472  return true;
1473  }
1474  }
1475  for (auto &Ev : Cmd->getPreparedHostDepsEvents()) {
1476  auto *EvDepCmd = static_cast<Command *>(Ev->getCommand());
1477  if (!EvDepCmd) {
1478  continue;
1479  }
1480  if (createsCircularDependency(EvDepCmd, IsPartOfFusion, Fusion)) {
1481  return true;
1482  }
1483  }
1484  return false;
1485 }
1486 
1488 Scheduler::GraphBuilder::completeFusion(QueueImplPtr Queue,
1489  std::vector<Command *> &ToEnqueue,
1490  const property_list &PropList) {
1491  auto QUniqueID = std::hash<QueueImplPtr>()(Queue);
1492 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
1493  if (!isInFusionMode(QUniqueID)) {
1494  auto InactiveFusionList = findFusionList(QUniqueID);
1495  if (InactiveFusionList == MFusionMap.end()) {
1496  throw sycl::exception{
1497  sycl::make_error_code(sycl::errc::invalid),
1498  "Calling complete_fusion on a queue not in fusion mode"};
1499  }
1500  return InactiveFusionList->second->getEvent();
1501  }
1502 
1503  auto FusionList = findFusionList(QUniqueID);
1504  auto *PlaceholderCmd = FusionList->second.get();
1505  auto &CmdList = PlaceholderCmd->getFusionList();
1506 
1507  // We need to check if fusing the kernel would create a circular dependency. A
1508  // circular dependency would arise, if a kernel in the fusion list
1509  // *indirectly* depends on another kernel in the fusion list. Here, indirectly
1510  // means, that the dependency is created through a third command not part of
1511  // the fusion, on which this kernel depends and which in turn depends on
1512  // another kernel in fusion list.
1513  bool CreatesCircularDep =
1514  std::any_of(CmdList.begin(), CmdList.end(), [&](ExecCGCommand *Cmd) {
1515  return checkForCircularDependency(Cmd, true, PlaceholderCmd);
1516  });
1517  if (CreatesCircularDep) {
1518  // If fusing would create a fused kernel, cancel the fusion.
1519  printFusionWarning(
1520  "Aborting fusion because it would create a circular dependency");
1521  auto LastEvent = PlaceholderCmd->getEvent();
1522  this->cancelFusion(Queue, ToEnqueue);
1523  return LastEvent;
1524  }
1525 
1526  // Call the JIT compiler to generate a new fused kernel.
1527  auto FusedCG = detail::jit_compiler::get_instance().fuseKernels(
1528  Queue, CmdList, PropList);
1529 
1530  if (!FusedCG) {
1531  // If the JIT compiler returns a nullptr, JIT compilation of the fused
1532  // kernel failed. In that case, simply cancel the fusion and run each kernel
1533  // on its own.
1534  auto LastEvent = PlaceholderCmd->getEvent();
1535  this->cancelFusion(Queue, ToEnqueue);
1536  return LastEvent;
1537  }
1538 
1539  // Inherit all event dependencies from the input commands in the fusion list.
1540  std::vector<EventImplPtr> FusedEventDeps;
1541  for (auto *Cmd : CmdList) {
1542  FusedEventDeps.insert(FusedEventDeps.end(),
1543  Cmd->getPreparedDepsEvents().begin(),
1544  Cmd->getPreparedDepsEvents().end());
1545  FusedEventDeps.insert(FusedEventDeps.end(),
1546  Cmd->getPreparedHostDepsEvents().begin(),
1547  Cmd->getPreparedHostDepsEvents().end());
1548  }
1549 
1550  // Remove internal explicit dependencies, i.e., explicit dependencies from one
1551  // kernel in the fusion list to another kernel also in the fusion list.
1552  FusedEventDeps.erase(
1553  std::remove_if(FusedEventDeps.begin(), FusedEventDeps.end(),
1554  [&](EventImplPtr &E) {
1555  if (E->getCommand() == PlaceholderCmd) {
1556  return true;
1557  }
1558  if (E->getCommand() &&
1559  static_cast<Command *>(E->getCommand())->getType() ==
1560  Command::RUN_CG) {
1561  auto *RunCGCmd =
1562  static_cast<ExecCGCommand *>(E->getCommand());
1563  if (RunCGCmd->MFusionCmd == PlaceholderCmd) {
1564  return true;
1565  }
1566  }
1567  return false;
1568  }),
1569  FusedEventDeps.end());
1570 
1571  auto FusedKernelCmd =
1572  std::make_unique<ExecCGCommand>(std::move(FusedCG), Queue);
1573 
1574  assert(PlaceholderCmd->MDeps.empty());
1575  // Next, backwards iterate over all the commands in the fusion list and remove
1576  // them from the graph to restore the state before starting fusion, so we can
1577  // add the fused kernel to the graph in the next step.
1578  // Clean up the old commands after successfully fusing them.
1579  for (auto OldCmd = CmdList.rbegin(); OldCmd != CmdList.rend(); ++OldCmd) {
1580  removeNodeFromGraph(*OldCmd, ToEnqueue);
1581  cleanupCommand(*OldCmd, /* AllowUnsubmitted */ true);
1582  }
1583 
1584  createGraphForCommand(FusedKernelCmd.get(), FusedKernelCmd->getCG(), false,
1585  FusedKernelCmd->getCG().MRequirements, FusedEventDeps,
1586  Queue, ToEnqueue);
1587 
1588  ToEnqueue.push_back(FusedKernelCmd.get());
1589 
1590  std::vector<Command *> ToCleanUp;
1591  // Make the placeholder command depend on the execution of the fused kernel
1592  auto *ConnectToPlaceholder =
1593  PlaceholderCmd->addDep(FusedKernelCmd->getEvent(), ToCleanUp);
1594  if (ConnectToPlaceholder) {
1595  ToEnqueue.push_back(ConnectToPlaceholder);
1596  }
1597  for (Command *Cmd : ToCleanUp) {
1598  cleanupCommand(Cmd);
1599  }
1600  ToEnqueue.push_back(PlaceholderCmd);
1601 
1602  if (MPrintOptionsArray[AfterFusionComplete]) {
1603  printGraphAsDot("after_fusionComplete");
1604  }
1605 
1606  // Set the status for the fusion command.
1607  PlaceholderCmd->setFusionStatus(KernelFusionCommand::FusionStatus::COMPLETE);
1608 
1609  return FusedKernelCmd.release()->getEvent();
1610 #else // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1611  printFusionWarning("Kernel fusion not supported by this build");
1612  (void)PropList;
1613  auto FusionList = findFusionList(QUniqueID);
1614  auto *PlaceholderCmd = FusionList->second.get();
1615  auto LastEvent = PlaceholderCmd->getEvent();
1616  this->cancelFusion(Queue, ToEnqueue);
1617  return LastEvent;
1618 #endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1619 }
1620 
1621 bool Scheduler::GraphBuilder::isInFusionMode(QueueIdT Id) {
1622  auto FusionList = findFusionList(Id);
1623  if (FusionList == MFusionMap.end()) {
1624  return false;
1625  }
1626  return FusionList->second->isActive();
1627 }
1628 
1629 } // namespace detail
1630 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
1631 } // namespace sycl
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:24
sycl::_V1::detail::Command
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:99
sycl::_V1::detail::Command::readyForCleanup
virtual bool readyForCleanup() const
Returns true iff this command is ready to be submitted for cleanup.
Definition: commands.cpp:719
sycl::_V1::detail::LeavesCollection::remove
size_t remove(value_type Cmd)
Replacement for std::remove with subsequent call to erase(newEnd, end()).
Definition: leaves_collection.cpp:38
event_impl.hpp
sycl::_V1::access::mode
mode
Definition: access.hpp:30
sycl::_V1::range::size
size_t size() const
Definition: range.hpp:50
sycl::_V1::detail::markNodeAsVisited
static bool markNodeAsVisited(Command *Cmd, std::vector< Command * > &Visited)
Definition: graph_builder.cpp:126
sycl::_V1::detail::AllocaCommandBase::MLinkedAllocaCmd
AllocaCommandBase * MLinkedAllocaCmd
Alloca command linked with current command.
Definition: commands.hpp:448
sycl::_V1::detail::createsCircularDependency
static bool createsCircularDependency(Command *Cmd, bool PredPartOfFusion, KernelFusionCommand *Fusion)
Definition: graph_builder.cpp:1439
sycl::_V1::detail::CGUpdateHost
"Update host" command group class.
Definition: cg.hpp:240
context_impl.hpp
sycl::_V1::detail::Command::isSuccessfullyEnqueued
bool isSuccessfullyEnqueued() const
Definition: commands.hpp:146
sycl::_V1::detail::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:30
sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:92
sycl::_V1::detail::Command::MMarks
Marks MMarks
Used for marking the node during graph traversal.
Definition: commands.hpp:309
config.hpp
sycl::_V1::detail::AccessorImplHost::MIsSubBuffer
bool MIsSubBuffer
Definition: accessor_impl.hpp:114
sycl::_V1::detail::QueueIdT
std::hash< std::shared_ptr< detail::queue_impl > >::result_type QueueIdT
Definition: scheduler.hpp:187
sycl::_V1::detail::AccessorImplHost
Definition: accessor_impl.hpp:42
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
sycl::_V1::detail::AccessorImplHost::MAccessMode
access::mode MAccessMode
Definition: accessor_impl.hpp:107
sycl::_V1::detail::combineAccessModesOfReqs
static void combineAccessModesOfReqs(std::vector< Requirement * > &Reqs)
Definition: graph_builder.cpp:911
sycl::_V1::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:59
sycl::_V1::detail::MemCpyCommandHost::getRequirement
const Requirement * getRequirement() const final
Definition: commands.hpp:574
sycl::_V1::detail::AccessorImplHost::MElemSize
unsigned int MElemSize
Definition: accessor_impl.hpp:112
sycl::_V1::detail::MapMemObject
The map command enqueues mapping of device memory onto host memory.
Definition: commands.hpp:504
sycl::_V1::detail::Command::getQueue
const QueueImplPtr & getQueue() const
Definition: commands.hpp:166
sycl::_V1::detail::SYCLConfig
Definition: config.hpp:110
sycl::_V1::detail::isInteropHostTask
static bool isInteropHostTask(ExecCGCommand *Cmd)
Definition: graph_builder.cpp:901
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::detail::EmptyCommand
The empty command does nothing during enqueue.
Definition: commands.hpp:378
sycl::_V1::detail::AccessorImplHost::MOffsetInBytes
unsigned int MOffsetInBytes
Definition: accessor_impl.hpp:113
sycl::_V1::detail::Command::Marks::MToBeDeleted
bool MToBeDeleted
Used for marking the node for deletion during cleanup.
Definition: commands.hpp:306
sycl::_V1::detail::doOverlap
static bool doOverlap(const Requirement *LHS, const Requirement *RHS)
Checks whether two requirements overlap or not.
Definition: graph_builder.cpp:43
sycl::_V1::detail::Command::printDot
virtual void printDot(std::ostream &Stream) const =0
access.hpp
queue_impl.hpp
sycl::_V1::detail::KernelFusionCommand
The KernelFusionCommand is placed in the execution graph together with the individual kernels of the ...
Definition: commands.hpp:660
sycl::_V1::detail::Command::addUser
void addUser(Command *NewUser)
Definition: commands.hpp:129
sycl::_V1::detail::ExecCGCommand::getCG
detail::CG & getCG() const
Definition: commands.hpp:612
sycl::_V1::detail::Command::MLeafCounter
unsigned MLeafCounter
Counts the number of memory objects this command is a leaf for.
Definition: commands.hpp:300
sycl::_V1::detail::Requirement
AccessorImplHost Requirement
Definition: accessor_impl.hpp:150
scheduler.hpp
sycl::_V1::detail::MemObjRecord::MMemModified
bool MMemModified
Definition: scheduler.hpp:222
sycl::_V1::detail::Command::MDeps
std::vector< DepDesc > MDeps
Contains list of dependencies(edges)
Definition: commands.hpp:294
sycl::_V1::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:111
sycl::_V1::range< 3 >
sycl::_V1::detail::MemObjRecord::MReadLeaves
LeavesCollection MReadLeaves
Definition: scheduler.hpp:208
sycl::_V1::detail::DepDesc::MAllocaCmd
AllocaCommandBase * MAllocaCmd
Allocation command for the memory object we have requirement for.
Definition: commands.hpp:89
sycl::_V1::detail::AllocaCommandBase::getRequirement
const Requirement * getRequirement() const final
Definition: commands.hpp:431
sycl::_V1::detail::UpdateHostRequirementCommand::getRequirement
const Requirement * getRequirement() const final
Definition: commands.hpp:647
sycl::_V1::detail::CGHostTask::MQueue
std::shared_ptr< detail::queue_impl > MQueue
Definition: cg.hpp:370
sycl::_V1::detail::LeavesCollection
A wrapper for CircularBuffer class along with collection for host accessor's EmptyCommands.
Definition: leaves_collection.hpp:38
sycl::_V1::detail::checkForCircularDependency
static bool checkForCircularDependency(Command *, bool, KernelFusionCommand *)
Definition: graph_builder.cpp:1454
sycl::_V1::detail::DepDesc::MDepRequirement
const Requirement * MDepRequirement
Requirement for the dependency.
Definition: commands.hpp:86
sycl::_V1::detail::DepDesc::MDepCommand
Command * MDepCommand
The actual dependency command.
Definition: commands.hpp:84
sycl::_V1::detail::SYCLMemObjI::getInteropContext
virtual ContextImplPtr getInteropContext() const =0
sycl::_V1::detail::insertMapUnmapForLinkedCmds
static Command * insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, AllocaCommandBase *AllocaCmdDst, access::mode MapMode)
Definition: graph_builder.cpp:312
sycl::_V1::detail::AccessorImplHost::MSYCLMemObj
detail::SYCLMemObjI * MSYCLMemObj
Definition: accessor_impl.hpp:109
sycl::_V1::detail::MemObjRecord
Memory Object Record.
Definition: scheduler.hpp:198
sycl::_V1::detail::Command::getType
CommandType getType() const
Definition: commands.hpp:132
sycl::_V1::detail::MemObjRecord::MWriteLeaves
LeavesCollection MWriteLeaves
Definition: scheduler.hpp:211
sycl::_V1::detail::IsSuitableSubReq
static bool IsSuitableSubReq(const Requirement *Req)
Checks if current requirement is requirement for sub buffer.
Definition: graph_builder.cpp:57
sycl::_V1::detail::CGHostTask::MHostTask
std::unique_ptr< HostTask > MHostTask
Definition: cg.hpp:368
sycl::_V1::detail::DepDesc
Dependency between two commands.
Definition: commands.hpp:73
sycl::_V1::detail::MemObjRecord::MCurContext
ContextImplPtr MCurContext
Definition: scheduler.hpp:214
sycl::_V1::detail::Command::getEvent
const EventImplPtr & getEvent() const
Definition: commands.hpp:168
sycl::_V1::detail::isPartOfFusion
static bool isPartOfFusion(Command *Cmd, KernelFusionCommand *Fusion)
Definition: graph_builder.cpp:1430
sycl::_V1::detail::HostTask
Definition: cg_types.hpp:228
sycl::_V1::detail::isAccessModeAllowed
static bool isAccessModeAllowed(access::mode Required, access::mode Current)
Checks if the required access mode is allowed under the current one.
Definition: graph_builder.cpp:62
sycl::_V1::detail::AccessorImplHost::MBlockedCmd
Command * MBlockedCmd
Definition: accessor_impl.hpp:118
std::cerr
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
sycl::_V1::detail::MemObjRecord::MHostAccess
access::mode MHostAccess
Definition: scheduler.hpp:218
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:49
sycl::_V1::read_write
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:74
sycl::_V1::detail::EventImplPtr
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:42
sycl::_V1::detail::LeavesCollection::EnqueueListT
std::vector< Command * > EnqueueListT
Definition: leaves_collection.hpp:42
sycl::_V1::detail::AllocaCommandBase::MMemAllocation
void * MMemAllocation
Definition: commands.hpp:441
sycl::_V1::detail::LeavesCollection::AllocateDependencyF
std::function< void(Command *, Command *, MemObjRecord *, EnqueueListT &)> AllocateDependencyF
Definition: leaves_collection.hpp:46
sycl::_V1::detail::CG::getType
CGTYPE getType()
Definition: cg.hpp:101
sycl::_V1::detail::Command::getPreparedDepsEvents
const std::vector< EventImplPtr > & getPreparedDepsEvents() const
Definition: commands.hpp:285
sycl::_V1::detail::queue_impl
Definition: queue_impl.hpp:59
sycl::_V1::detail::Command::MUsers
std::unordered_set< Command * > MUsers
Contains list of commands that depend on the command.
Definition: commands.hpp:296
sycl::_V1::detail::ExecCGCommand
The exec CG command enqueues execution of kernel or explicit memory operation.
Definition: commands.hpp:601
sycl::_V1::detail::printDotRecursive
static void printDotRecursive(std::fstream &Stream, std::vector< Command * > &Visited, Command *Cmd)
Definition: graph_builder.cpp:158
sycl::_V1::detail::combineAccessModes
static access::mode combineAccessModes(access::mode A, access::mode B)
Combines two access modes into a single one that allows both.
Definition: graph_builder.cpp:80
sycl::_V1::detail::CGHostTask
Definition: cg.hpp:366
sycl::_V1::detail::Command::CommandType
CommandType
Definition: commands.hpp:101
sycl::_V1::detail::Command::Marks::MVisited
bool MVisited
Used for marking the node as visited during graph traversal.
Definition: commands.hpp:304
sycl::_V1::detail::AllocaCommandBase::MIsActive
bool MIsActive
Indicates that current alloca is active one.
Definition: commands.hpp:450
exception.hpp
sycl::_V1::detail::Command::getPreparedHostDepsEvents
const std::vector< EventImplPtr > & getPreparedHostDepsEvents() const
Definition: commands.hpp:281
sycl::_V1::detail::handleVisitedNodes
static void handleVisitedNodes(std::vector< Command * > &Visited)
Definition: graph_builder.cpp:140
sycl::_V1::detail::QueueImplPtr
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:32
sycl::_V1::detail::sameCtx
static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS)
Definition: graph_builder.cpp:50
sycl::_V1::detail::Command::addDep
Command * addDep(DepDesc NewDep, std::vector< Command * > &ToCleanUp)
Definition: commands.cpp:724
sycl::_V1::detail::SYCLMemObjI
Definition: sycl_mem_obj_i.hpp:28
any_of
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
sycl::_V1::detail::checkHostUnifiedMemory
static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx)
Definition: graph_builder.cpp:668
sycl::_V1::detail::AccessorImplHost::MData
void *& MData
Definition: accessor_impl.hpp:116
sycl::_V1::detail::AllocaCommandBase::MIsConst
bool MIsConst
Definition: commands.hpp:456
sycl::_V1::detail::AccessorImplHost::MAccessRange
range< 3 > & MAccessRange
Definition: accessor_impl.hpp:104
sycl::_V1::detail::LeavesCollection::toVector
std::vector< value_type > toVector() const
Definition: leaves_collection.cpp:64
sycl::_V1::detail::unmarkVisitedNodes
static void unmarkVisitedNodes(std::vector< Command * > &Visited)
Definition: graph_builder.cpp:135
sycl::_V1::detail::CG
Base class for all types of command groups.
Definition: cg.hpp:52
sycl::_V1::detail::UpdateHostRequirementCommand
Definition: commands.hpp:641
sycl::_V1::detail::KernelFusionCommand::readyForDeletion
bool readyForDeletion() const
Definition: commands.hpp:683
sycl::_V1::detail::AllocaCommandBase
Base class for memory allocation commands.
Definition: commands.hpp:420
sycl::_V1::detail::FusionList
std::unique_ptr< KernelFusionCommand > FusionList
Definition: scheduler.hpp:189
sycl::_V1::AccessMode
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
Definition: accessor.hpp:2854
sycl::_V1::detail::UnMapMemObject
The unmap command removes mapping of host memory onto device memory.
Definition: commands.hpp:523
jit_compiler.hpp
sycl::_V1::detail::MemObjRecord::MAllocaCommands
std::vector< AllocaCommandBase * > MAllocaCommands
Definition: scheduler.hpp:205
sycl::_V1::detail::SYCLMemObjI::getSizeInBytes
virtual size_t getSizeInBytes() const =0
sycl::_V1::detail::MemCpyCommandHost
The mem copy host command enqueues memory copy between two instances of memory object.
Definition: commands.hpp:567
sycl::_V1::detail::SYCLMemObjI::MRecord
std::shared_ptr< MemObjRecord > MRecord
Definition: sycl_mem_obj_i.hpp:74
sycl_mem_obj_t.hpp
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300
sycl::_V1::detail::Command::getWorkerContext
virtual const ContextImplPtr & getWorkerContext() const
Get the context of the queue this command will be submitted to.
Definition: commands.cpp:706
memory_manager.hpp