16 #include <sycl/feature_test.hpp>
20 inline namespace _V1 {
24 namespace experimental {
39 bool visitNodeDepthFirst(
40 std::shared_ptr<node_impl> Node,
41 std::set<std::shared_ptr<node_impl>> &VisitedNodes,
42 std::deque<std::shared_ptr<node_impl>> &NodeStack,
43 std::function<
bool(std::shared_ptr<node_impl> &,
44 std::deque<std::shared_ptr<node_impl>> &)>
46 auto EarlyReturn = NodeFunc(Node, NodeStack);
50 NodeStack.push_back(Node);
51 Node->MVisited =
true;
52 VisitedNodes.emplace(Node);
53 for (
auto &Successor : Node->MSuccessors) {
54 if (visitNodeDepthFirst(Successor.lock(), VisitedNodes, NodeStack,
69 void sortTopological(std::shared_ptr<node_impl> NodeImpl,
70 std::list<std::shared_ptr<node_impl>> &Schedule,
71 bool PartitionBounded =
false) {
72 for (
auto &Succ : NodeImpl->MSuccessors) {
73 auto NextNode = Succ.lock();
74 if (PartitionBounded &&
75 (NextNode->MPartitionNum != NodeImpl->MPartitionNum)) {
79 if (std::find(Schedule.begin(), Schedule.end(), NextNode) ==
81 sortTopological(NextNode, Schedule, PartitionBounded);
85 Schedule.push_front(NodeImpl);
99 void propagatePartitionUp(std::shared_ptr<node_impl> Node,
int PartitionNum) {
100 if (((Node->MPartitionNum != -1) && (Node->MPartitionNum <= PartitionNum)) ||
101 (Node->MCGType == sycl::detail::CG::CGTYPE::CodeplayHostTask)) {
104 Node->MPartitionNum = PartitionNum;
105 for (
auto &Predecessor : Node->MPredecessors) {
106 propagatePartitionUp(Predecessor.lock(), PartitionNum);
117 void propagatePartitionDown(
118 std::shared_ptr<node_impl> Node,
int PartitionNum,
119 std::list<std::shared_ptr<node_impl>> &HostTaskList) {
120 if (Node->MCGType == sycl::detail::CG::CGTYPE::CodeplayHostTask) {
121 if (Node->MPartitionNum != -1) {
122 HostTaskList.push_front(Node);
126 Node->MPartitionNum = PartitionNum;
127 for (
auto &Successor : Node->MSuccessors) {
128 propagatePartitionDown(Successor.lock(), PartitionNum, HostTaskList);
136 bool isPartitionRoot(std::shared_ptr<node_impl> Node) {
137 for (
auto &Predecessor : Node->MPredecessors) {
138 if (Predecessor.lock()->MPartitionNum == Node->MPartitionNum) {
147 std::vector<node> createNodesFromImpls(
148 const std::vector<std::weak_ptr<detail::node_impl>> &Impls) {
149 std::vector<node> Nodes{};
150 Nodes.reserve(Impls.size());
152 for (std::weak_ptr<detail::node_impl> Impl : Impls) {
153 Nodes.push_back(sycl::detail::createSyclObjFromImpl<node>(Impl.lock()));
161 std::vector<node> createNodesFromImpls(
162 const std::vector<std::shared_ptr<detail::node_impl>> &Impls) {
163 std::vector<node> Nodes{};
164 Nodes.reserve(Impls.size());
166 for (std::shared_ptr<detail::node_impl> Impl : Impls) {
167 Nodes.push_back(sycl::detail::createSyclObjFromImpl<node>(Impl));
177 for (
auto &Node :
MRoots) {
178 sortTopological(Node.lock(),
MSchedule,
true);
184 int CurrentPartition = -1;
185 std::list<std::shared_ptr<node_impl>> HostTaskList;
187 for (
auto &Node : MNodeStorage) {
188 if (Node->MCGType == sycl::detail::CG::CodeplayHostTask) {
189 HostTaskList.push_back(Node);
217 while (HostTaskList.size() > 0) {
218 auto Node = HostTaskList.front();
219 HostTaskList.pop_front();
221 for (
auto &Predecessor : Node->MPredecessors) {
222 propagatePartitionUp(Predecessor.lock(), CurrentPartition);
225 Node->MPartitionNum = CurrentPartition;
227 auto TmpSize = HostTaskList.size();
228 for (
auto &Successor : Node->MSuccessors) {
229 propagatePartitionDown(Successor.lock(), CurrentPartition, HostTaskList);
231 if (HostTaskList.size() > TmpSize) {
233 for (
const auto &HT : HostTaskList) {
234 auto HTPartitionNum = HT->MPartitionNum;
235 if (HTPartitionNum != -1) {
238 for (
const auto &NodeImpl : MNodeStorage) {
239 if (NodeImpl->MPartitionNum == Node->MPartitionNum - 1) {
240 NodeImpl->MPartitionNum = HTPartitionNum - 1;
251 int PartitionFinalNum = 0;
252 for (
int i = -1; i <= CurrentPartition; i++) {
253 const std::shared_ptr<partition> &Partition = std::make_shared<partition>();
254 for (
auto &Node : MNodeStorage) {
255 if (Node->MPartitionNum == i) {
256 MPartitionNodes[Node] = PartitionFinalNum;
257 if (isPartitionRoot(Node)) {
258 Partition->MRoots.insert(Node);
262 if (Partition->MRoots.size() > 0) {
263 Partition->schedule();
264 MPartitions.push_back(Partition);
270 if (MPartitions.size() == 0) {
271 MPartitions.push_back(std::make_shared<partition>());
275 for (
const auto &Partition : MPartitions) {
276 MSchedule.insert(MSchedule.end(), Partition->MSchedule.begin(),
277 Partition->MSchedule.end());
281 for (
const auto &Partition : MPartitions) {
282 for (
auto const &Root : Partition->MRoots) {
283 auto RootNode = Root.lock();
284 for (
const auto &Dep : RootNode->MPredecessors) {
285 auto NodeDep = Dep.lock();
286 Partition->MPredecessors.push_back(
287 MPartitions[MPartitionNodes[NodeDep]]);
293 for (
auto &Node : MNodeStorage) {
294 Node->MPartitionNum = -1;
300 for (
auto &MemObj : MMemObjs) {
301 MemObj->markNoLongerBeingUsedInGraph();
305 std::shared_ptr<node_impl> graph_impl::addNodesToExits(
306 const std::shared_ptr<graph_impl> &Impl,
307 const std::list<std::shared_ptr<node_impl>> &NodeList) {
309 std::vector<std::shared_ptr<node_impl>> Inputs;
310 std::vector<std::shared_ptr<node_impl>> Outputs;
311 for (
auto &NodeImpl : NodeList) {
312 if (NodeImpl->MPredecessors.size() == 0) {
313 Inputs.push_back(NodeImpl);
315 if (NodeImpl->MSuccessors.size() == 0) {
316 Outputs.push_back(NodeImpl);
323 if (NodeImpl->MSuccessors.size() == 0) {
324 for (
auto &Input : Inputs) {
325 NodeImpl->registerSuccessor(Input, NodeImpl);
331 for (
auto &Node : NodeList) {
333 addEventForNode(Impl, std::make_shared<sycl::detail::event_impl>(), Node);
336 return this->
add(Impl, Outputs);
339 void graph_impl::addRoot(
const std::shared_ptr<node_impl> &Root) {
347 std::shared_ptr<node_impl>
349 const std::vector<std::shared_ptr<node_impl>> &Dep) {
353 const std::shared_ptr<node_impl> &NodeImpl = std::make_shared<node_impl>();
356 Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end());
360 addDepsToNode(NodeImpl, Deps);
362 addEventForNode(Impl, std::make_shared<sycl::detail::event_impl>(), NodeImpl);
366 std::shared_ptr<node_impl>
368 std::function<
void(
handler &)> CGF,
369 const std::vector<sycl::detail::ArgDesc> &Args,
370 const std::vector<std::shared_ptr<node_impl>> &Dep) {
376 if (Handler.MCGType == sycl::detail::CG::Barrier) {
379 "The sycl_ext_oneapi_enqueue_barrier feature is not available with "
380 "SYCL Graph Explicit API. Please use empty nodes instead.");
384 Handler.MImpl->MUserFacingNodeType !=
386 ? Handler.MImpl->MUserFacingNodeType
390 auto NodeImpl = this->
add(NodeType, std::move(Handler.MGraphNodeCG), Dep);
391 NodeImpl->MNDRangeUsed = Handler.MImpl->MNDRangeUsed;
393 addEventForNode(Impl, std::make_shared<sycl::detail::event_impl>(), NodeImpl);
397 auto &DynamicParams = Handler.MImpl->MDynamicParameters;
401 "dynamic_parameters cannot be registered with graph "
402 "nodes which do not represent kernel executions");
405 for (
auto &[DynamicParam, ArgIndex] : DynamicParams) {
406 DynamicParam->registerNode(NodeImpl, ArgIndex);
412 std::shared_ptr<node_impl>
414 const std::vector<sycl::detail::EventImplPtr> Events) {
416 std::vector<std::shared_ptr<node_impl>> Deps;
419 for (
const auto &Dep : Events) {
420 if (
auto NodeImpl = MEventsMap.find(Dep); NodeImpl != MEventsMap.end()) {
421 Deps.push_back(NodeImpl->second);
424 "Event dependency from handler::depends_on does "
425 "not correspond to a node within the graph");
429 return this->
add(Impl, Deps);
432 std::shared_ptr<node_impl>
434 std::unique_ptr<sycl::detail::CG> CommandGroup,
435 const std::vector<std::shared_ptr<node_impl>> &Dep) {
440 std::set<std::shared_ptr<node_impl>> UniqueDeps;
441 const auto &Requirements = CommandGroup->getRequirements();
442 if (!MAllowBuffers && Requirements.size()) {
444 "Cannot use buffers in a graph without passing the "
445 "assume_buffer_outlives_graph property on "
446 "Graph construction.");
449 for (
auto &Req : Requirements) {
451 auto MemObj =
static_cast<sycl::detail::SYCLMemObjT *
>(Req->MSYCLMemObj);
452 bool WasInserted = MMemObjs.insert(MemObj).second;
454 MemObj->markBeingUsedInGraph();
458 if (Node->hasRequirementDependency(Req)) {
459 bool ShouldAddDep =
true;
462 for (
auto &Succ : Node->MSuccessors) {
463 if (Succ.lock()->hasRequirementDependency(Req)) {
464 ShouldAddDep =
false;
469 UniqueDeps.insert(Node);
476 for (
auto &Dep : CommandGroup->getEvents()) {
477 if (
auto NodeImpl = MEventsMap.find(Dep); NodeImpl != MEventsMap.end()) {
478 UniqueDeps.insert(NodeImpl->second);
481 "Event dependency from handler::depends_on does "
482 "not correspond to a node within the graph");
487 Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end());
490 Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end());
492 const std::shared_ptr<node_impl> &NodeImpl =
493 std::make_shared<node_impl>(NodeType, std::move(CommandGroup));
496 addDepsToNode(NodeImpl, Deps);
499 if (NodeImpl->MCGType == sycl::detail::CG::Barrier) {
500 MExtraDependencies.push_back(NodeImpl);
507 bool AnyQueuesCleared =
false;
508 for (
auto &Queue : MRecordingQueues) {
509 if (
auto ValidQueue = Queue.lock(); ValidQueue) {
510 ValidQueue->setCommandGraph(
nullptr);
511 AnyQueuesCleared =
true;
514 MRecordingQueues.clear();
516 return AnyQueuesCleared;
519 void graph_impl::searchDepthFirst(
520 std::function<
bool(std::shared_ptr<node_impl> &,
521 std::deque<std::shared_ptr<node_impl>> &)>
526 std::set<std::shared_ptr<node_impl>> VisitedNodes;
528 for (
auto &Root :
MRoots) {
529 std::deque<std::shared_ptr<node_impl>> NodeStack;
530 if (visitNodeDepthFirst(Root.lock(), VisitedNodes, NodeStack, NodeFunc)) {
536 for (
auto &Node : VisitedNodes) {
537 Node->MVisited =
false;
541 bool graph_impl::checkForCycles() {
544 bool CycleFound =
false;
545 auto CheckFunc = [&](std::shared_ptr<node_impl> &Node,
546 std::deque<std::shared_ptr<node_impl>> &NodeStack) {
549 if (std::find(NodeStack.begin(), NodeStack.end(), Node) !=
556 searchDepthFirst(CheckFunc);
561 std::shared_ptr<node_impl> Dest) {
566 "make_edge() cannot be called when Src and Dest are the same.");
569 bool SrcFound =
false;
570 bool DestFound =
false;
573 SrcFound |= Node == Src;
574 DestFound |= Node == Dest;
576 if (SrcFound && DestFound) {
583 "Src must be a node inside the graph.");
587 "Dest must be a node inside the graph.");
591 Src->registerSuccessor(Dest, Src);
596 if (Dest->MSuccessors.empty() || !MSkipCycleChecks) {
597 bool CycleFound = checkForCycles();
601 Src->MSuccessors.pop_back();
602 Dest->MPredecessors.pop_back();
605 "Command graphs cannot contain cycles.");
612 std::vector<sycl::detail::EventImplPtr> Events;
615 if (Node->MSuccessors.empty()) {
625 void exec_graph_impl::findRealDeps(
626 std::vector<sycl::detail::pi::PiExtSyncPoint> &Deps,
627 std::shared_ptr<node_impl> CurrentNode,
int ReferencePartitionNum) {
628 if (CurrentNode->isEmpty()) {
629 for (
auto &N : CurrentNode->MPredecessors) {
630 auto NodeImpl = N.lock();
631 findRealDeps(Deps, NodeImpl, ReferencePartitionNum);
635 if (MPartitionNodes[CurrentNode] == ReferencePartitionNum) {
637 auto SyncPoint = MPiSyncPoints.find(CurrentNode);
638 assert(SyncPoint != MPiSyncPoints.end() &&
639 "No sync point has been set for node dependency.");
641 if (std::find(Deps.begin(), Deps.end(), SyncPoint->second) ==
643 Deps.push_back(SyncPoint->second);
652 std::shared_ptr<node_impl> Node) {
653 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps;
654 for (
auto &N : Node->MPredecessors) {
655 findRealDeps(Deps, N.lock(), MPartitionNodes[Node]);
660 Ctx, DeviceImpl, CommandBuffer,
661 *
static_cast<sycl::detail::CGExecKernel *
>((Node->MCommandGroup.get())),
662 Deps, &NewSyncPoint, &NewCommand,
nullptr);
664 MCommandMap[Node] = NewCommand;
666 if (Res != pi_result::PI_SUCCESS) {
668 "Failed to add kernel to PI command-buffer");
675 sycl::context Ctx, std::shared_ptr<sycl::detail::device_impl> DeviceImpl,
677 std::shared_ptr<node_impl> Node) {
680 auto AllocaQueue = std::make_shared<sycl::detail::queue_impl>(
684 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps;
685 for (
auto &N : Node->MPredecessors) {
686 findRealDeps(Deps, N.lock(), MPartitionNodes[Node]);
690 sycl::detail::Scheduler::getInstance().addCG(
691 Node->getCGCopy(), AllocaQueue, CommandBuffer, Deps);
693 MCommandMap[Node] = Event->getCommandBufferCommand();
694 return Event->getSyncPoint();
697 sycl::device Device, std::shared_ptr<partition> &Partition) {
707 ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), &Desc,
709 if (Res != pi_result::PI_SUCCESS) {
713 Partition->MPiCommandBuffers[Device] = OutCommandBuffer;
715 for (
const auto &Node : Partition->MSchedule) {
724 if (type == sycl::detail::CG::Kernel &&
725 Node->MCommandGroup->getRequirements().size() +
726 static_cast<sycl::detail::CGExecKernel *
>(
727 Node->MCommandGroup.get())
730 MPiSyncPoints[Node] =
731 enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node);
733 MPiSyncPoints[Node] =
734 enqueueNode(MContext, DeviceImpl, OutCommandBuffer, Node);
738 MRequirements.insert(MRequirements.end(),
739 Node->MCommandGroup->getRequirements().begin(),
740 Node->MCommandGroup->getRequirements().end());
743 MAccessors.insert(MAccessors.end(),
744 Node->MCommandGroup->getAccStorage().begin(),
745 Node->MCommandGroup->getAccStorage().end());
751 if (Res != pi_result::PI_SUCCESS) {
753 "Failed to finalize PI command-buffer");
758 const std::shared_ptr<graph_impl> &GraphImpl,
760 : MSchedule(), MGraphImpl(GraphImpl), MPiSyncPoints(),
761 MDevice(GraphImpl->getDevice()), MContext(Context), MRequirements(),
763 MIsUpdatable(PropList.
has_property<property::graph::updatable>()) {
769 bool SupportsUpdate = MGraphImpl->getDevice().has(aspect::ext_oneapi_graph);
770 if (!SupportsUpdate) {
772 "Device does not support Command Graph update");
785 for (
auto &Event : MExecutionEvents) {
789 for (
const auto &Partition : MPartitions) {
790 Partition->MSchedule.clear();
791 for (
const auto &Iter : Partition->MPiCommandBuffers) {
792 if (
auto CmdBuf = Iter.second; CmdBuf) {
796 assert(Res == pi_result::PI_SUCCESS);
801 for (
auto &Iter : MCommandMap) {
802 if (
auto Command = Iter.second; Command) {
806 assert(Res == pi_result::PI_SUCCESS);
818 PartitionsExecutionEvents;
820 auto CreateNewEvent([&]() {
821 auto NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
822 NewEvent->setContextImpl(Queue->getContextImplPtr());
823 NewEvent->setStateIncomplete();
828 std::vector<sycl::detail::EventImplPtr> BackupCGDataMEvents;
829 if (MPartitions.size() > 1) {
830 BackupCGDataMEvents = CGData.
MEvents;
832 for (uint32_t currentPartitionsNum = 0;
833 currentPartitionsNum < MPartitions.size(); currentPartitionsNum++) {
834 auto CurrentPartition = MPartitions[currentPartitionsNum];
836 if (currentPartitionsNum > 0) {
837 CGData.
MEvents = BackupCGDataMEvents;
840 for (
auto const &DepPartition : CurrentPartition->MPredecessors) {
841 CGData.
MEvents.push_back(PartitionsExecutionEvents[DepPartition]);
845 CurrentPartition->MPiCommandBuffers[Queue->get_device()];
864 for (std::vector<sycl::detail::EventImplPtr>::iterator It =
865 MExecutionEvents.begin();
866 It != MExecutionEvents.end();) {
868 if (!Event->isCompleted()) {
869 if (Queue->get_device().get_backend() ==
870 sycl::backend::ext_oneapi_level_zero) {
873 auto &AttachedEventsList = Event->getPostCompleteEvents();
874 CGData.
MEvents.reserve(AttachedEventsList.size() + 1);
875 CGData.
MEvents.push_back(Event);
877 for (
auto &AttachedEvent : AttachedEventsList) {
878 CGData.
MEvents.push_back(AttachedEvent);
884 It = MExecutionEvents.erase(It);
888 NewEvent = CreateNewEvent();
893 MRequirements.begin(), MRequirements.end());
900 if (NewEvent !=
nullptr)
901 NewEvent->setHostEnqueueTime();
906 CommandBuffer, Queue->getHandleRef(), 0,
nullptr, OutEvent);
907 if (Res == pi_result::PI_ERROR_INVALID_QUEUE_PROPERTIES) {
910 "Graphs cannot be submitted to a queue which uses "
911 "immediate command lists. Use "
912 "sycl::ext::intel::property::queue::no_immediate_"
913 "command_list to disable them.");
914 }
else if (Res != pi_result::PI_SUCCESS) {
917 "Failed to enqueue event for command buffer submission");
920 std::unique_ptr<sycl::detail::CG> CommandGroup =
921 std::make_unique<sycl::detail::CGExecCommandBuffer>(
922 CommandBuffer,
nullptr, std::move(CGData));
924 NewEvent = sycl::detail::Scheduler::getInstance().addCG(
925 std::move(CommandGroup), Queue);
927 NewEvent->setEventFromSubmittedExecCommandBuffer(
true);
928 }
else if ((CurrentPartition->MSchedule.size() > 0) &&
929 (CurrentPartition->MSchedule.front()->MCGType ==
930 sycl::detail::CG::CGTYPE::CodeplayHostTask)) {
931 auto NodeImpl = CurrentPartition->MSchedule.front();
933 NodeImpl->MCommandGroup->getEvents().insert(
934 NodeImpl->MCommandGroup->getEvents().end(), CGData.
MEvents.begin(),
939 static_cast<sycl::detail::CGHostTask &
>(*NodeImpl->MCommandGroup.get())
942 NewEvent = sycl::detail::Scheduler::getInstance().addCG(
943 NodeImpl->getCGCopy(), Queue);
945 std::vector<std::shared_ptr<sycl::detail::event_impl>> ScheduledEvents;
946 for (
auto &NodeImpl : CurrentPartition->MSchedule) {
947 std::vector<sycl::detail::pi::PiEvent> RawEvents;
951 if (NodeImpl->MCGType == sycl::detail::CG::Kernel &&
952 NodeImpl->MCommandGroup->getRequirements().size() +
953 static_cast<sycl::detail::CGExecKernel *
>(
954 NodeImpl->MCommandGroup.get())
957 sycl::detail::CGExecKernel *CG =
958 static_cast<sycl::detail::CGExecKernel *
>(
959 NodeImpl->MCommandGroup.get());
960 auto OutEvent = CreateNewEvent();
962 Queue, CG->MNDRDesc, CG->MArgs, CG->MKernelBundle,
963 CG->MSyclKernel, CG->MKernelName, RawEvents, OutEvent,
968 if (Res != pi_result::PI_SUCCESS) {
971 "Error during emulated graph command group submission.");
973 ScheduledEvents.push_back(NewEvent);
974 }
else if (!NodeImpl->isEmpty()) {
978 sycl::detail::Scheduler::getInstance().addCG(
979 NodeImpl->getCGCopy(), Queue);
981 ScheduledEvents.push_back(EventImpl);
985 NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
986 NewEvent->setStateIncomplete();
987 NewEvent->getPreparedDepsEvents() = ScheduledEvents;
989 PartitionsExecutionEvents[CurrentPartition] = NewEvent;
994 MExecutionEvents.push_back(NewEvent);
997 for (
auto const &Elem : PartitionsExecutionEvents) {
998 if (Elem.second != NewEvent) {
999 NewEvent->attachEventToComplete(Elem.second);
1003 sycl::detail::createSyclObjFromImpl<sycl::event>(NewEvent);
1007 void exec_graph_impl::duplicateNodes() {
1009 std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>> NodesMap;
1011 const std::vector<std::shared_ptr<node_impl>> &ModifiableNodes =
1012 MGraphImpl->MNodeStorage;
1013 std::deque<std::shared_ptr<node_impl>> NewNodes;
1015 for (
size_t i = 0; i < ModifiableNodes.size(); i++) {
1016 auto OriginalNode = ModifiableNodes[i];
1017 std::shared_ptr<node_impl> NodeCopy =
1018 std::make_shared<node_impl>(*OriginalNode);
1022 MIDCache.insert(std::make_pair(OriginalNode->MID, NodeCopy));
1025 NodeCopy->MSuccessors.clear();
1026 NodeCopy->MPredecessors.clear();
1028 NewNodes.push_back(NodeCopy);
1030 NodesMap.insert({OriginalNode, NodeCopy});
1035 for (
size_t i = 0; i < ModifiableNodes.size(); i++) {
1036 auto OriginalNode = ModifiableNodes[i];
1037 auto NodeCopy = NewNodes[i];
1040 for (
auto &NextNode : OriginalNode->MSuccessors) {
1041 auto Successor = NodesMap.at(NextNode.lock());
1042 NodeCopy->registerSuccessor(Successor, NodeCopy);
1049 for (
auto NewNodeIt = NewNodes.rbegin(); NewNodeIt != NewNodes.rend();
1051 auto NewNode = *NewNodeIt;
1055 const std::vector<std::shared_ptr<node_impl>> &SubgraphNodes =
1056 NewNode->MSubGraphImpl->MNodeStorage;
1057 std::deque<std::shared_ptr<node_impl>> NewSubgraphNodes{};
1060 std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>>
1064 for (
size_t i = 0; i < SubgraphNodes.size(); i++) {
1065 auto SubgraphNode = SubgraphNodes[i];
1066 auto NodeCopy = std::make_shared<node_impl>(*SubgraphNode);
1069 MIDCache.insert(std::make_pair(SubgraphNode->MID, NodeCopy));
1071 NewSubgraphNodes.push_back(NodeCopy);
1072 SubgraphNodesMap.insert({SubgraphNode, NodeCopy});
1073 NodeCopy->MSuccessors.clear();
1074 NodeCopy->MPredecessors.clear();
1078 for (
size_t i = 0; i < SubgraphNodes.size(); i++) {
1079 auto SubgraphNode = SubgraphNodes[i];
1080 auto NodeCopy = NewSubgraphNodes[i];
1082 for (
auto &NextNode : SubgraphNode->MSuccessors) {
1083 auto Successor = SubgraphNodesMap.at(NextNode.lock());
1084 NodeCopy->registerSuccessor(Successor, NodeCopy);
1089 std::vector<std::shared_ptr<node_impl>> Inputs;
1090 std::vector<std::shared_ptr<node_impl>> Outputs;
1091 for (
auto &NodeImpl : NewSubgraphNodes) {
1092 if (NodeImpl->MPredecessors.size() == 0) {
1093 Inputs.push_back(NodeImpl);
1095 if (NodeImpl->MSuccessors.size() == 0) {
1096 Outputs.push_back(NodeImpl);
1104 for (
auto &PredNodeWeak : NewNode->MPredecessors) {
1105 auto PredNode = PredNodeWeak.lock();
1106 auto &Successors = PredNode->MSuccessors;
1109 Successors.erase(std::remove_if(Successors.begin(), Successors.end(),
1110 [NewNode](
auto WeakNode) {
1111 return WeakNode.lock() == NewNode;
1117 for (
auto &Input : Inputs) {
1118 PredNode->registerSuccessor(Input, PredNode);
1123 for (
auto &SuccNodeWeak : NewNode->MSuccessors) {
1124 auto SuccNode = SuccNodeWeak.lock();
1125 auto &Predecessors = SuccNode->MPredecessors;
1128 Predecessors.erase(std::remove_if(Predecessors.begin(),
1130 [NewNode](
auto WeakNode) {
1131 return WeakNode.lock() == NewNode;
1133 Predecessors.end());
1137 for (
auto &Output : Outputs) {
1138 Output->registerSuccessor(SuccNode, Output);
1144 auto OldPositionIt =
1145 NewNodes.erase(std::find(NewNodes.begin(), NewNodes.end(), NewNode));
1148 auto InsertIt = NewNodes.insert(OldPositionIt, NewSubgraphNodes.begin(),
1149 NewSubgraphNodes.end());
1152 NewNodeIt = std::make_reverse_iterator(std::next(InsertIt));
1156 MNodeStorage.insert(MNodeStorage.begin(), NewNodes.begin(), NewNodes.end());
1161 if (MDevice != GraphImpl->getDevice()) {
1164 "Cannot update using a graph created with a different device.");
1166 if (MContext != GraphImpl->getContext()) {
1169 "Cannot update using a graph created with a different context.");
1172 if (MNodeStorage.size() != GraphImpl->MNodeStorage.size()) {
1174 "Cannot update using a graph with a different "
1175 "topology. Mismatch found in the number of nodes.");
1177 for (uint32_t i = 0; i < MNodeStorage.size(); ++i) {
1178 if (MNodeStorage[i]->MSuccessors.size() !=
1179 GraphImpl->MNodeStorage[i]->MSuccessors.size() ||
1180 MNodeStorage[i]->MPredecessors.size() !=
1181 GraphImpl->MNodeStorage[i]->MPredecessors.size()) {
1184 "Cannot update using a graph with a different topology. Mismatch "
1185 "found in the number of edges.");
1188 if (MNodeStorage[i]->MCGType != GraphImpl->MNodeStorage[i]->MCGType) {
1191 "Cannot update using a graph with mismatched node types. Each pair "
1192 "of nodes being updated must have the same type");
1197 for (uint32_t i = 0; i < MNodeStorage.size(); ++i) {
1199 std::make_pair(GraphImpl->MNodeStorage[i]->MID, MNodeStorage[i]));
1202 update(GraphImpl->MNodeStorage);
1206 this->
update(std::vector<std::shared_ptr<node_impl>>{Node});
1210 const std::vector<std::shared_ptr<node_impl>> Nodes) {
1212 if (!MIsUpdatable) {
1214 "update() cannot be called on a executable graph "
1215 "which was not created with property::updatable");
1221 bool NeedScheduledUpdate =
false;
1222 std::vector<sycl::detail::AccessorImplHost *> UpdateRequirements;
1225 UpdateRequirements.reserve(MRequirements.size());
1226 for (
auto &Node : Nodes) {
1228 if (MIDCache.count(Node->getID()) == 0) {
1231 "Node passed to update() is not part of the graph.");
1233 if (Node->MCGType != sycl::detail::CG::Kernel) {
1237 if (Node->MCommandGroup->getRequirements().size() == 0) {
1240 NeedScheduledUpdate =
true;
1242 UpdateRequirements.insert(UpdateRequirements.end(),
1243 Node->MCommandGroup->getRequirements().begin(),
1244 Node->MCommandGroup->getRequirements().end());
1249 for (
auto It = MExecutionEvents.begin(); It != MExecutionEvents.end();) {
1250 if ((*It)->isCompleted()) {
1251 It = MExecutionEvents.erase(It);
1259 NeedScheduledUpdate |= MExecutionEvents.size() > 0;
1261 if (NeedScheduledUpdate) {
1262 auto AllocaQueue = std::make_shared<sycl::detail::queue_impl>(
1267 sycl::detail::Scheduler::getInstance().addCommandGraphUpdate(
1268 this, Nodes, AllocaQueue, UpdateRequirements, MExecutionEvents);
1270 for (
auto &Node : Nodes) {
1276 MRequirements.clear();
1277 for (
auto &Node : MNodeStorage) {
1278 MRequirements.insert(MRequirements.end(),
1279 Node->MCommandGroup->getRequirements().begin(),
1280 Node->MCommandGroup->getRequirements().end());
1291 *(
static_cast<sycl::detail::CGExecKernel *
>(Node->MCommandGroup.get()));
1293 std::vector<sycl::detail::ArgDesc> NodeArgs = ExecCG.getArguments();
1295 auto NDRDesc = ExecCG.MNDRDesc;
1298 auto Kernel = ExecCG.MSyclKernel;
1300 std::shared_ptr<sycl::detail::kernel_impl> SyclKernelImpl =
nullptr;
1309 auto KernelName = ExecCG.MKernelName;
1311 sycl::detail::ProgramManager::getInstance().getSYCLKernelID(KernelName);
1315 PiKernel = SyclKernelImpl->getHandleRef();
1316 EliminatedArgMask = SyclKernelImpl->getKernelArgMask();
1317 }
else if (Kernel !=
nullptr) {
1319 EliminatedArgMask = Kernel->getKernelArgMask();
1322 sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
1323 ContextImpl, DeviceImpl, ExecCG.MKernelName);
1327 std::vector<sycl::detail::ArgDesc> MaskedArgs;
1328 MaskedArgs.reserve(NodeArgs.size());
1331 EliminatedArgMask, NodeArgs,
1332 [&MaskedArgs](sycl::detail::ArgDesc &Arg,
int NextTrueIndex) {
1333 MaskedArgs.emplace_back(Arg.MType, Arg.MPtr, Arg.MSize, NextTrueIndex);
1339 size_t RequiredWGSize[3] = {0, 0, 0};
1340 size_t *LocalSize =
nullptr;
1342 if (NDRDesc.LocalSize[0] != 0)
1343 LocalSize = &NDRDesc.LocalSize[0];
1346 PiKernel, DeviceImpl->getHandleRef(),
1351 const bool EnforcedLocalSize =
1352 (RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 ||
1353 RequiredWGSize[2] != 0);
1354 if (EnforcedLocalSize)
1355 LocalSize = RequiredWGSize;
1360 std::vector<pi_ext_command_buffer_update_memobj_arg_desc_t> MemobjDescs;
1361 std::vector<pi_ext_command_buffer_update_pointer_arg_desc_t> PtrDescs;
1362 std::vector<pi_ext_command_buffer_update_value_arg_desc_t> ValueDescs;
1363 MemobjDescs.reserve(MaskedArgs.size());
1364 PtrDescs.reserve(MaskedArgs.size());
1365 ValueDescs.reserve(MaskedArgs.size());
1371 for (
size_t i = 0; i < MaskedArgs.size(); i++) {
1372 auto &NodeArg = MaskedArgs[i];
1373 switch (NodeArg.MType) {
1374 case kernel_param_kind_t::kind_pointer: {
1375 PtrDescs.push_back({
static_cast<uint32_t
>(NodeArg.MIndex), NodeArg.MPtr});
1377 case kernel_param_kind_t::kind_std_layout: {
1378 ValueDescs.push_back({
static_cast<uint32_t
>(NodeArg.MIndex),
1379 static_cast<uint32_t
>(NodeArg.MSize),
1382 case kernel_param_kind_t::kind_accessor: {
1388 switch (Req->MAccessMode) {
1405 static_cast<uint32_t
>(NodeArg.MIndex), &MemObjData,
1406 static_cast<pi_mem>(Req->MData)});
1432 auto ExecNode = MIDCache.find(Node->MID);
1433 assert(ExecNode != MIDCache.end() &&
"Node ID was not found in ID cache");
1437 ExecNode->second->updateFromOtherNode(Node);
1440 MCommandMap[ExecNode->second];
1443 Command, &UpdateDesc);
1445 if (Res != PI_SUCCESS) {
1453 : impl(
std::make_shared<detail::
graph_impl>(SyclContext, SyclDevice,
1458 : impl(
std::make_shared<detail::graph_impl>(
1459 SyclQueue.get_context(), SyclQueue.
get_device(), PropList)) {}
1462 impl->throwIfGraphRecordingQueue(
"Explicit API \"Add()\" function");
1463 std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1464 for (
auto &D : Deps) {
1469 std::shared_ptr<detail::node_impl> NodeImpl =
impl->add(
impl, DepImpls);
1470 return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1474 const std::vector<node> &Deps) {
1475 impl->throwIfGraphRecordingQueue(
"Explicit API \"Add()\" function");
1476 std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1477 for (
auto &D : Deps) {
1482 std::shared_ptr<detail::node_impl> NodeImpl =
1483 impl->add(
impl, CGF, {}, DepImpls);
1484 return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1490 std::shared_ptr<detail::node_impl> DstImpl =
1493 for (
auto &NodeImpl :
impl->MNodeStorage) {
1494 if ((NodeImpl->MSuccessors.size() == 0) && (NodeImpl != DstImpl)) {
1495 impl->makeEdge(NodeImpl, DstImpl);
1501 std::shared_ptr<detail::node_impl> SenderImpl =
1503 std::shared_ptr<detail::node_impl> ReceiverImpl =
1507 impl->makeEdge(SenderImpl, ReceiverImpl);
1516 this->
impl, this->
impl->getContext(), PropList};
1521 std::ignore = PropList;
1525 if (QueueImpl->get_context() !=
impl->getContext()) {
1527 "begin_recording called for a queue whose context "
1528 "differs from the graph context.");
1530 if (QueueImpl->get_device() !=
impl->getDevice()) {
1532 "begin_recording called for a queue whose device "
1533 "differs from the graph device.");
1536 if (QueueImpl->is_in_fusion_mode()) {
1538 "SYCL queue in kernel in fusion mode "
1539 "can NOT be recorded.");
1542 if (QueueImpl->get_context() !=
impl->getContext()) {
1544 "begin_recording called for a queue whose context "
1545 "differs from the graph context.");
1547 if (QueueImpl->get_device() !=
impl->getDevice()) {
1549 "begin_recording called for a queue whose device "
1550 "differs from the graph device.");
1553 if (QueueImpl->getCommandGraph() ==
nullptr) {
1554 QueueImpl->setCommandGraph(
impl);
1556 impl->addQueue(QueueImpl);
1558 if (QueueImpl->getCommandGraph() !=
impl) {
1560 "begin_recording called for a queue which is already "
1561 "recording to a different graph.");
1566 const std::vector<queue> &RecordingQueues,
1568 for (queue Queue : RecordingQueues) {
1575 impl->clearQueues();
1580 if (QueueImpl && QueueImpl->getCommandGraph() ==
impl) {
1581 QueueImpl->setCommandGraph(
nullptr);
1583 impl->removeQueue(QueueImpl);
1585 if (QueueImpl->getCommandGraph() !=
nullptr) {
1587 "end_recording called for a queue which is recording "
1588 "to a different graph.");
1593 const std::vector<queue> &RecordingQueues) {
1594 for (
queue Queue : RecordingQueues) {
1600 bool verbose)
const {
1602 if (path.substr(path.find_last_of(
".") + 1) ==
"dot") {
1603 impl->printGraphAsDot(path, verbose);
1607 "DOT graph is the only format supported at the moment.");
1612 return createNodesFromImpls(
impl->MNodeStorage);
1615 auto &Roots =
impl->MRoots;
1616 std::vector<std::weak_ptr<node_impl>> Impls{};
1618 std::copy(Roots.begin(), Roots.end(), std::back_inserter(Impls));
1619 return createNodesFromImpls(Impls);
1623 const std::shared_ptr<detail::graph_impl> &Graph,
const sycl::context &Ctx,
1630 impl->makePartitions();
1632 auto Device =
impl->getGraphImpl()->getDevice();
1633 for (
auto Partition :
impl->getPartitions()) {
1634 if (!Partition->isHostTask()) {
1635 impl->createCommandBuffers(Device, Partition);
1650 std::vector<std::shared_ptr<node_impl>> NodeImpls{};
1651 NodeImpls.reserve(Nodes.size());
1652 for (
auto &Node : Nodes) {
1656 impl->update(NodeImpls);
1666 impl->updateValue(NewValue, Size);
1670 const sycl::detail::AccessorBaseHost *Acc) {
1671 impl->updateAccessor(Acc);
1679 return detail::createNodesFromImpls(impl->MPredecessors);
1683 return detail::createNodesFromImpls(impl->MSuccessors);
1688 auto GraphImpl = EventImpl->getCommandGraph();
1690 return sycl::detail::createSyclObjFromImpl<node>(
1691 GraphImpl->getNodeForEvent(EventImpl));
1695 impl->updateNDRange(NDRange);
1698 impl->updateNDRange(NDRange);
1701 impl->updateNDRange(NDRange);
1703 template <>
void node::update_range<1>(
range<1> Range) {
1704 impl->updateRange(Range);
1706 template <>
void node::update_range<2>(
range<2> Range) {
1707 impl->updateRange(Range);
1709 template <>
void node::update_range<3>(
range<3> Range) {
1710 impl->updateRange(Range);
The context class represents a SYCL context on which kernel functions may be executed.
CGTYPE
Type of the command group.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Graph in the modifiable state.
void updateAccessor(const sycl::detail::AccessorBaseHost *Acc)
dynamic_parameter_base(sycl::ext::oneapi::experimental::command_graph< graph_state::modifiable > Graph, size_t ParamSize, const void *Data)
std::shared_ptr< dynamic_parameter_impl > impl
void updateValue(const void *NewValue, size_t Size)
Class representing the implementation of command_graph<executable>.
exec_graph_impl(sycl::context Context, const std::shared_ptr< graph_impl > &GraphImpl, const property_list &PropList)
Constructor.
void createCommandBuffers(sycl::device Device, std::shared_ptr< partition > &Partition)
Turns the internal graph representation into UR command-buffers for a device.
std::unique_lock< std::shared_mutex > WriteLock
void update(std::shared_ptr< graph_impl > GraphImpl)
~exec_graph_impl()
Destructor.
void makePartitions()
Partition the graph nodes and put the partition in MPartitions.
void updateImpl(std::shared_ptr< node_impl > NodeImpl)
std::shared_mutex MMutex
Protects all the fields that can be changed by class' methods.
sycl::event enqueue(const std::shared_ptr< sycl::detail::queue_impl > &Queue, sycl::detail::CG::StorageInitHelper CGData)
Called by handler::ext_oneapi_command_graph() to schedule graph for execution.
std::shared_ptr< detail::exec_graph_impl > impl
void update(const command_graph< graph_state::modifiable > &Graph)
Update the inputs & output of the graph.
executable_command_graph()=delete
An executable command-graph is not user constructable.
void finalizeImpl()
Creates a backend representation of the graph in impl member variable.
Implementation details of command_graph<modifiable>.
std::vector< std::shared_ptr< node_impl > > MNodeStorage
Storage for all nodes contained within a graph.
void removeRoot(const std::shared_ptr< node_impl > &Root)
Remove node from list of root nodes.
std::unique_lock< std::shared_mutex > WriteLock
void makeEdge(std::shared_ptr< node_impl > Src, std::shared_ptr< node_impl > Dest)
Make an edge between two nodes in the graph.
std::vector< sycl::detail::EventImplPtr > getExitNodesEvents()
Traverse the graph recursively to get the events associated with the output nodes of this graph.
void throwIfGraphRecordingQueue(const std::string ExceptionMsg) const
Throws an invalid exception if this function is called while a queue is recording commands to the gra...
std::shared_ptr< sycl::detail::event_impl > getEventForNode(std::shared_ptr< node_impl > NodeImpl) const
Find the sycl event associated with a node.
std::set< std::weak_ptr< node_impl >, std::owner_less< std::weak_ptr< node_impl > > > MRoots
List of root nodes.
bool clearQueues()
Remove all queues which are recording to this graph, also sets all queues cleared back to the executi...
std::shared_ptr< node_impl > add(node_type NodeType, std::unique_ptr< sycl::detail::CG > CommandGroup, const std::vector< std::shared_ptr< node_impl >> &Dep={})
Create a kernel node in the graph.
void addEventForNode(std::shared_ptr< graph_impl > GraphImpl, std::shared_ptr< sycl::detail::event_impl > EventImpl, std::shared_ptr< node_impl > NodeImpl)
Associate a sycl event with a node in the graph.
std::shared_lock< std::shared_mutex > ReadLock
std::shared_ptr< detail::graph_impl > impl
command_graph< graph_state::executable > finalize(const property_list &PropList={}) const
Finalize modifiable graph into an executable graph.
void addGraphLeafDependencies(node Node)
Adds all graph leaves as dependencies.
void begin_recording(queue &RecordingQueue, const property_list &PropList={})
Change the state of a queue to be recording and associate this graph with it.
std::vector< node > get_nodes() const
Get a list of all nodes contained in this graph.
void end_recording()
Set all queues currently recording to this graph to the executing state.
void print_graph(const std::string path, bool verbose=false) const
Synchronous operation that writes a DOT formatted description of the graph to the provided path.
node addImpl(std::function< void(handler &)> CGF, const std::vector< node > &Dep)
Template-less implementation of add() for CGF nodes.
void make_edge(node &Src, node &Dest)
Add a dependency between two nodes.
modifiable_command_graph(const context &SyclContext, const device &SyclDevice, const property_list &PropList={})
Constructor.
std::vector< node > get_root_nodes() const
Get a list of all root nodes (nodes without dependencies) in this graph.
void schedule()
Add nodes to MSchedule.
std::set< std::weak_ptr< node_impl >, std::owner_less< std::weak_ptr< node_impl > > > MRoots
List of root nodes.
std::list< std::shared_ptr< node_impl > > MSchedule
Execution schedule of nodes in the graph.
Class representing a node in the graph, returned by command_graph::add().
node_type get_type() const
Get the type of command associated with this node.
static node get_node_from_event(event nodeEvent)
Get the node associated with a SYCL event returned from a queue recording submission.
std::vector< node > get_successors() const
Get a list of all nodes which depend on this node.
std::vector< node > get_predecessors() const
Get a list of all the node dependencies of this node.
Command group handler class.
Objects of the class identify kernel is some kernel_bundle related APIs.
Provides an abstraction of a SYCL kernel.
Defines the iteration domain of both the work-groups and the overall dispatch.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
::pi_ext_sync_point PiExtSyncPoint
::pi_ext_command_buffer_command PiExtCommandBufferCommand
std::vector< bool > KernelArgMask
void ReverseRangeDimensionsForKernel(NDRDescT &NDR)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
AccessorImplHost Requirement
std::shared_ptr< device_impl > DeviceImplPtr
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative)
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, std::vector< ArgDesc > &Args, std::function< void(detail::ArgDesc &Arg, int NextTrueIndex)> Func)
pi_int32 enqueueImpCommandBufferKernel(context Ctx, DeviceImplPtr DeviceImpl, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const CGExecKernel &CommandGroup, std::vector< sycl::detail::pi::PiExtSyncPoint > &SyncPoints, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint, sycl::detail::pi::PiExtCommandBufferCommand *OutCommand, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
node_type getNodeTypeFromCG(sycl::detail::CG::CGTYPE CGType)
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
static constexpr bool has_property()
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
@ PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT
@ PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE
pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to submit the command-buffer to queue for execution, returns an error if the command-buffer is no...
pi_result piextCommandBufferFinalize(pi_ext_command_buffer command_buffer)
API to stop command-buffer recording such that no more commands can be appended, and makes the comman...
pi_result piextCommandBufferRelease(pi_ext_command_buffer command_buffer)
API to decrement the reference count of the command-buffer.
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextCommandBufferUpdateKernelLaunch(pi_ext_command_buffer_command command, pi_ext_command_buffer_update_kernel_launch_desc *desc)
API to update a kernel launch command inside of a command-buffer.
@ PI_EXT_STRUCTURE_TYPE_COMMAND_BUFFER_DESC
pi_result piextCommandBufferReleaseCommand(pi_ext_command_buffer_command command)
API to decrement the reference count of a command-buffer command.
@ PI_KERNEL_ARG_MEM_OBJ_ACCESS
pi_result piextCommandBufferCreate(pi_context context, pi_device device, const pi_ext_command_buffer_desc *desc, pi_ext_command_buffer *ret_command_buffer)
API to create a command-buffer.
pi_ext_command_buffer_update_pointer_arg_desc_t * ptr_arg_list
size_t * global_work_size
uint32_t num_mem_obj_args
size_t * global_work_offset
pi_ext_command_buffer_update_memobj_arg_desc_t * mem_obj_arg_list
pi_ext_command_buffer_update_value_arg_desc_t * value_arg_list
std::vector< detail::AccessorImplPtr > MAccStorage
Storage for accessors.
std::vector< detail::EventImplPtr > MEvents
List of events that order the execution of this CG.
std::vector< AccessorImplHost * > MRequirements
List of requirements that specify which memory is needed for the command group to be executed.