39 #if __has_include(<cxxabi.h>)
40 #define __SYCL_ENABLE_GNU_DEMANGLING
47 #ifdef XPTI_ENABLE_INSTRUMENTATION
48 #include "xpti/xpti_trace_framework.hpp"
56 #ifdef XPTI_ENABLE_INSTRUMENTATION
58 extern xpti::trace_event_data_t *GSYCLGraphEvent;
61 #ifdef __SYCL_ENABLE_GNU_DEMANGLING
62 struct DemangleHandle {
64 DemangleHandle(
char *ptr) : p(ptr) {}
69 DemangleHandle result(abi::__cxa_demangle(Name.c_str(), NULL, NULL, &Status));
70 return (Status == 0) ? result.p : Name;
91 std::vector<ArgDesc> &Args,
93 if (EliminatedArgMask.empty()) {
101 std::sort(Args.begin(), Args.end(), [](
const ArgDesc &A,
const ArgDesc &B) {
102 return A.MIndex < B.MIndex;
105 size_t NextTrueIndex = 0;
110 for (
int Idx = LastIndex + 1; Idx < Arg.
MIndex; ++Idx)
111 if (!EliminatedArgMask[Idx])
115 if (EliminatedArgMask[Arg.
MIndex])
118 Func(Arg, NextTrueIndex);
124 #ifdef XPTI_ENABLE_INSTRUMENTATION
125 static size_t deviceToID(
const device &Device) {
129 return reinterpret_cast<size_t>(
getSyclObjImpl(Device)->getHandleRef());
135 case access::mode::read:
139 case access::mode::read_write:
141 case access::mode::discard_write:
142 return "discard_write";
143 case access::mode::discard_read_write:
144 return "discard_read_write";
150 #ifdef XPTI_ENABLE_INSTRUMENTATION
155 case Command::CommandType::RUN_CG:
156 return "command_group_node";
157 case Command::CommandType::COPY_MEMORY:
158 return "memory_transfer_node";
159 case Command::CommandType::ALLOCA:
160 return "memory_allocation_node";
161 case Command::CommandType::ALLOCA_SUB_BUF:
162 return "sub_buffer_creation_node";
163 case Command::CommandType::RELEASE:
164 return "memory_deallocation_node";
165 case Command::CommandType::MAP_MEM_OBJ:
166 return "memory_transfer_node";
167 case Command::CommandType::UNMAP_MEM_OBJ:
168 return "memory_transfer_node";
169 case Command::CommandType::UPDATE_REQUIREMENT:
170 return "host_acc_create_buffer_lock_node";
171 case Command::CommandType::EMPTY_TASK:
172 return "host_acc_destroy_buffer_release_node";
174 return "unknown_node";
181 static std::string commandToName(Command::CommandType Type) {
183 case Command::CommandType::RUN_CG:
184 return "Command Group Action";
185 case Command::CommandType::COPY_MEMORY:
186 return "Memory Transfer (Copy)";
187 case Command::CommandType::ALLOCA:
188 return "Memory Allocation";
189 case Command::CommandType::ALLOCA_SUB_BUF:
190 return "Sub Buffer Creation";
191 case Command::CommandType::RELEASE:
192 return "Memory Deallocation";
193 case Command::CommandType::MAP_MEM_OBJ:
194 return "Memory Transfer (Map)";
195 case Command::CommandType::UNMAP_MEM_OBJ:
196 return "Memory Transfer (Unmap)";
197 case Command::CommandType::UPDATE_REQUIREMENT:
198 return "Host Accessor Creation/Buffer Lock";
199 case Command::CommandType::EMPTY_TASK:
200 return "Host Accessor Destruction/Buffer Lock Release";
202 return "Unknown Action";
207 static std::vector<RT::PiEvent>
209 std::vector<RT::PiEvent> RetPiEvents;
210 for (
auto &EventImpl : EventImpls) {
211 if (EventImpl->getHandleRef() !=
nullptr)
212 RetPiEvents.push_back(EventImpl->getHandleRef());
220 for (
auto &EventImpl : EventImpls) {
221 EventImpl->flushIfNeeded(Queue);
227 std::vector<interop_handle::ReqToMem> MReqToMem;
230 std::map<const detail::plugin *, std::vector<EventImplPtr>>
231 RequiredEventsPerPlugin;
235 RequiredEventsPerPlugin[&Plugin].push_back(Event);
243 for (
auto &PluginWithEvents : RequiredEventsPerPlugin) {
244 std::vector<RT::PiEvent> RawEvents =
getPiEvents(PluginWithEvents.second);
250 HostTask.MQueue->reportAsyncException(std::current_exception());
254 HostTask.MQueue->reportAsyncException(std::current_exception());
262 Event->waitInternal();
270 std::vector<interop_handle::ReqToMem> ReqToMem)
271 : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)) {}
274 assert(MThisCmd->
getCG().
getType() == CG::CGTYPE::CodeplayHostTask);
280 std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error(
281 std::string(
"Couldn't wait for host-task's dependencies"),
283 HostTask.MQueue->reportAsyncException(EPtr);
292 if (
HostTask.MHostTask->isInteropTask()) {
294 HostTask.MQueue->getDeviceImplPtr(),
295 HostTask.MQueue->getContextImplPtr()};
301 HostTask.MQueue->reportAsyncException(std::current_exception());
308 assert(EmptyCmd &&
"No empty command found");
316 std::vector<Command *> ToCleanUp;
317 Scheduler &Sched = Scheduler::getInstance();
321 std::vector<DepDesc> Deps = MThisCmd->
MDeps;
324 MThisCmd->
MEvent->setComplete();
328 for (
const DepDesc &Dep : Deps)
329 Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement, ToCleanUp);
335 void Command::waitForPreparedHostEvents()
const {
336 for (
const EventImplPtr &HostEvent : MPreparedHostDepsEvents)
337 HostEvent->waitInternal();
341 std::vector<EventImplPtr> &EventImpls,
344 if (!EventImpls.empty()) {
345 if (Queue->is_host()) {
360 std::map<context_impl *, std::vector<EventImplPtr>>
361 RequiredEventsPerContext;
365 assert(Context.get() &&
366 "Only non-host events are expected to be waited for here");
367 RequiredEventsPerContext[Context.get()].push_back(Event);
370 for (
auto &CtxWithEvents : RequiredEventsPerContext) {
371 std::vector<RT::PiEvent> RawEvents =
getPiEvents(CtxWithEvents.second);
373 RawEvents.size(), RawEvents.data());
378 assert(Event->getContextImpl().
get() &&
379 "Only non-host events are expected to be waited for here");
382 std::vector<RT::PiEvent> RawEvents =
getPiEvents(EventImpls);
386 Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event);
395 : MQueue(
std::move(Queue)),
397 MPreparedDepsEvents(MEvent->getPreparedDepsEvents()),
398 MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents()),
405 #ifdef XPTI_ENABLE_INSTRUMENTATION
406 if (!xptiTraceEnabled())
414 #ifdef XPTI_ENABLE_INSTRUMENTATION
430 Command *Cmd,
void *ObjAddr,
bool IsCommand,
431 std::optional<access::mode> AccMode) {
432 #ifdef XPTI_ENABLE_INSTRUMENTATION
440 xpti::utils::StringHelper SH;
441 std::string AddressStr = SH.addressAsString<
void *>(ObjAddr);
443 std::string TypeString = SH.nameWithAddressString(Prefix, AddressStr);
446 xpti::payload_t Payload(TypeString.c_str(),
MAddress);
447 uint64_t EdgeInstanceNo;
449 xptiMakeEvent(TypeString.c_str(), &Payload, xpti::trace_graph_event,
450 xpti_at::active, &EdgeInstanceNo);
452 xpti_td *SrcEvent =
static_cast<xpti_td *
>(Cmd->
MTraceEvent);
453 xpti_td *TgtEvent =
static_cast<xpti_td *
>(
MTraceEvent);
454 EdgeEvent->source_id = SrcEvent->unique_id;
455 EdgeEvent->target_id = TgtEvent->unique_id;
457 xpti::addMetadata(EdgeEvent,
"access_mode",
458 static_cast<int>(AccMode.value()));
459 xpti::addMetadata(EdgeEvent,
"memory_object",
460 reinterpret_cast<size_t>(ObjAddr));
462 xpti::addMetadata(EdgeEvent,
"event",
reinterpret_cast<size_t>(ObjAddr));
464 xptiNotifySubscribers(
MStreamID, xpti::trace_edge_create,
465 detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo,
480 #ifdef XPTI_ENABLE_INSTRUMENTATION
493 xpti::utils::StringHelper SH;
494 std::string AddressStr = SH.addressAsString<
RT::PiEvent>(PiEventAddr);
499 std::string NodeName = SH.nameWithAddressString(
"virtual_node", AddressStr);
501 xpti::payload_t VNPayload(NodeName.c_str(),
MAddress);
502 uint64_t VNodeInstanceNo;
504 xptiMakeEvent(NodeName.c_str(), &VNPayload, xpti::trace_graph_event,
505 xpti_at::active, &VNodeInstanceNo);
507 xpti::addMetadata(NodeEvent,
"kernel_name", NodeName);
508 xptiNotifySubscribers(
MStreamID, xpti::trace_node_create,
509 detail::GSYCLGraphEvent, NodeEvent, VNodeInstanceNo,
512 std::string EdgeName = SH.nameWithAddressString(
"Event", AddressStr);
513 xpti::payload_t EdgePayload(EdgeName.c_str(),
MAddress);
514 uint64_t EdgeInstanceNo;
516 xptiMakeEvent(EdgeName.c_str(), &EdgePayload, xpti::trace_graph_event,
517 xpti_at::active, &EdgeInstanceNo);
518 if (EdgeEvent && NodeEvent) {
521 xpti_td *TgtEvent =
static_cast<xpti_td *
>(
MTraceEvent);
522 EdgeEvent->source_id = NodeEvent->unique_id;
523 EdgeEvent->target_id = TgtEvent->unique_id;
524 xpti::addMetadata(EdgeEvent,
"event",
525 reinterpret_cast<size_t>(PiEventAddr));
526 xptiNotifySubscribers(
MStreamID, xpti::trace_edge_create,
527 detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo,
536 uint64_t CommandInstanceNo = 0;
537 #ifdef XPTI_ENABLE_INSTRUMENTATION
538 if (!xptiTraceEnabled())
539 return CommandInstanceNo;
545 xpti::utils::StringHelper SH;
547 std::string CommandString =
550 xpti::payload_t p(CommandString.c_str(),
MAddress);
551 xpti_td *CmdTraceEvent =
552 xptiMakeEvent(CommandString.c_str(), &p, xpti::trace_graph_event,
553 xpti_at::active, &CommandInstanceNo);
564 return CommandInstanceNo;
568 #ifdef XPTI_ENABLE_INSTRUMENTATION
572 xptiNotifySubscribers(
MStreamID, xpti::trace_node_create,
573 detail::GSYCLGraphEvent,
580 std::vector<Command *> &ToCleanUp) {
582 const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr();
588 bool PiEventExpected =
589 !DepEvent->is_host() ||
getType() == CommandType::HOST_TASK;
590 if (
auto *DepCmd =
static_cast<Command *
>(DepEvent->getCommand()))
591 PiEventExpected &= DepCmd->producesPiEvent();
593 if (!PiEventExpected) {
600 Command *ConnectionCmd =
nullptr;
605 getType() != CommandType::HOST_TASK)
610 if (DepEventContext != WorkerContext && !WorkerContext->is_host()) {
616 return ConnectionCmd;
620 return MQueue->getContextImplPtr();
633 Command *ConnectionCmd =
nullptr;
642 if (!ConnectionCmd) {
643 MDeps.push_back(NewDep);
648 #ifdef XPTI_ENABLE_INSTRUMENTATION
654 return ConnectionCmd;
658 std::vector<Command *> &ToCleanUp) {
659 #ifdef XPTI_ENABLE_INSTRUMENTATION
673 #ifdef XPTI_ENABLE_INSTRUMENTATION
674 if (!(xptiTraceEnabled() &&
MTraceEvent && PiEventAddr))
678 xptiNotifySubscribers(
MStreamID, xpti::trace_signal, detail::GSYCLGraphEvent,
680 (
void *)PiEventAddr);
685 #ifdef XPTI_ENABLE_INSTRUMENTATION
689 xptiNotifySubscribers(
MStreamID, Type, detail::GSYCLGraphEvent,
691 static_cast<const void *
>(Txt));
696 std::vector<Command *> &ToCleanUp) {
708 static bool ThrowOnBlock = getenv(
"SYCL_THROW_ON_BLOCK") !=
nullptr;
710 throw sycl::runtime_error(
711 std::string(
"Waiting for blocked command. Block reason: ") +
715 #ifdef XPTI_ENABLE_INSTRUMENTATION
719 std::string Info =
"enqueue.barrier[";
727 #ifdef XPTI_ENABLE_INSTRUMENTATION
738 #ifdef XPTI_ENABLE_INSTRUMENTATION
754 if (CL_SUCCESS != Res)
759 (
MEvent->is_host() ||
MEvent->getHandleRef() ==
nullptr))
769 ToCleanUp.push_back(
this);
775 #ifdef XPTI_ENABLE_INSTRUMENTATION
782 #ifdef XPTI_ENABLE_INSTRUMENTATION
783 assert(
MType == CommandType::RELEASE &&
"Expected release command");
789 xpti_td *TgtTraceEvent =
static_cast<xpti_td *
>(
MTraceEvent);
794 for (
auto &Item : DepList) {
795 if (Item->MTraceEvent && Item->MAddress ==
MAddress) {
796 xpti::utils::StringHelper SH;
797 std::string AddressStr = SH.addressAsString<
void *>(
MAddress);
798 std::string TypeString =
799 "Edge:" + SH.nameWithAddressString(commandToName(
MType), AddressStr);
803 xpti::payload_t p(TypeString.c_str(),
MAddress);
804 uint64_t EdgeInstanceNo;
806 xptiMakeEvent(TypeString.c_str(), &p, xpti::trace_graph_event,
807 xpti_at::active, &EdgeInstanceNo);
809 xpti_td *SrcTraceEvent =
static_cast<xpti_td *
>(Item->MTraceEvent);
810 EdgeEvent->target_id = TgtTraceEvent->unique_id;
811 EdgeEvent->source_id = SrcTraceEvent->unique_id;
812 xpti::addMetadata(EdgeEvent,
"memory_object",
813 reinterpret_cast<size_t>(
MAddress));
814 xptiNotifySubscribers(
MStreamID, xpti::trace_edge_create,
815 detail::GSYCLGraphEvent, EdgeEvent,
816 EdgeInstanceNo,
nullptr);
826 return "A Buffer is locked by the host accessor";
828 return "Blocked by host task";
831 return "Unknown block reason";
837 :
Command(Type, Queue), MLinkedAllocaCmd(LinkedAllocaCmd),
838 MIsLeaderAlloca(nullptr == LinkedAllocaCmd), MRequirement(
std::move(Req)),
839 MReleaseCmd(Queue, this) {
845 #ifdef XPTI_ENABLE_INSTRUMENTATION
846 if (!xptiTraceEnabled())
855 xpti::addMetadata(TE,
"sycl_device", deviceToID(
MQueue->get_device()));
856 xpti::addMetadata(TE,
"sycl_device_type",
858 xpti::addMetadata(TE,
"sycl_device_name",
860 xpti::addMetadata(TE,
"memory_object",
reinterpret_cast<size_t>(
MAddress));
870 bool InitFromUserData,
874 MInitFromUserData(InitFromUserData) {
879 std::vector<Command *> ToCleanUp;
882 assert(ConnectionCmd ==
nullptr);
883 assert(ToCleanUp.empty());
888 #ifdef XPTI_ENABLE_INSTRUMENTATION
889 if (!xptiTraceEnabled())
899 cl_int AllocaCommand::enqueueImp() {
905 void *HostPtr =
nullptr;
920 std::move(EventImpls), Event);
926 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
928 Stream <<
"ID = " <<
this <<
"\\n";
932 Stream <<
"\"];" << std::endl;
935 for (
const auto &Dep :
MDeps) {
936 if (Dep.MDepCommand ==
nullptr)
938 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
939 <<
" [ label = \"Access mode: "
941 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
948 std::vector<Command *> &ToEnqueue,
949 std::vector<Command *> &ToCleanUp)
953 MParentAlloca(ParentAlloca) {
961 ToEnqueue.push_back(ConnectionCmd);
965 #ifdef XPTI_ENABLE_INSTRUMENTATION
966 if (!xptiTraceEnabled())
974 xpti::addMetadata(TE,
"access_range_start",
976 xpti::addMetadata(TE,
"access_range_end",
988 return static_cast<void *
>(
995 cl_int AllocaSubBufCommand::enqueueImp() {
1011 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
1013 Stream <<
"ID = " <<
this <<
"\\n";
1019 Stream <<
"\"];" << std::endl;
1021 for (
const auto &Dep :
MDeps) {
1022 if (Dep.MDepCommand ==
nullptr)
1024 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1025 <<
" [ label = \"Access mode: "
1027 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1038 #ifdef XPTI_ENABLE_INSTRUMENTATION
1039 if (!xptiTraceEnabled())
1047 xpti_td *TE =
static_cast<xpti_td *
>(
MTraceEvent);
1048 xpti::addMetadata(TE,
"sycl_device", deviceToID(
MQueue->get_device()));
1049 xpti::addMetadata(TE,
"sycl_device_type",
1051 xpti::addMetadata(TE,
"sycl_device_name",
1053 xpti::addMetadata(TE,
"allocation_type",
1054 commandToName(MAllocaCmd->
getType()));
1060 cl_int ReleaseCommand::enqueueImp() {
1063 std::vector<RT::PiEvent> RawEvents =
getPiEvents(EventImpls);
1064 bool SkipRelease =
false;
1070 const bool CurAllocaIsHost = MAllocaCmd->
getQueue()->is_host();
1071 bool NeedUnmap =
false;
1085 NeedUnmap |= CurAllocaIsHost == MAllocaCmd->
MIsActive;
1094 UnmapEventImpl->setContextImpl(Queue->getContextImplPtr());
1095 RT::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef();
1097 void *Src = CurAllocaIsHost
1101 void *Dst = !CurAllocaIsHost
1106 RawEvents, UnmapEvent);
1110 EventImpls.push_back(UnmapEventImpl);
1124 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#FF827A\", label=\"";
1126 Stream <<
"ID = " <<
this <<
" ; ";
1128 Stream <<
" Alloca : " << MAllocaCmd <<
"\\n";
1129 Stream <<
" MemObj : " << MAllocaCmd->
getSYCLMemObj() <<
"\\n";
1130 Stream <<
"\"];" << std::endl;
1132 for (
const auto &Dep :
MDeps) {
1133 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1134 <<
" [ label = \"Access mode: "
1136 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1149 MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(
std::move(Req)), MDstPtr(DstPtr),
1155 #ifdef XPTI_ENABLE_INSTRUMENTATION
1156 if (!xptiTraceEnabled())
1164 xpti_td *TE =
static_cast<xpti_td *
>(
MTraceEvent);
1165 xpti::addMetadata(TE,
"sycl_device", deviceToID(
MQueue->get_device()));
1166 xpti::addMetadata(TE,
"sycl_device_type",
1168 xpti::addMetadata(TE,
"sycl_device_name",
1170 xpti::addMetadata(TE,
"memory_object",
reinterpret_cast<size_t>(
MAddress));
1176 cl_int MapMemObject::enqueueImp() {
1179 std::vector<RT::PiEvent> RawEvents =
getPiEvents(EventImpls);
1192 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#77AFFF\", label=\"";
1194 Stream <<
"ID = " <<
this <<
" ; ";
1197 Stream <<
"\"];" << std::endl;
1199 for (
const auto &Dep :
MDeps) {
1200 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1201 <<
" [ label = \"Access mode: "
1203 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1211 MDstAllocaCmd(DstAllocaCmd), MDstReq(
std::move(Req)), MSrcPtr(SrcPtr) {
1216 #ifdef XPTI_ENABLE_INSTRUMENTATION
1217 if (!xptiTraceEnabled())
1225 xpti_td *TE =
static_cast<xpti_td *
>(
MTraceEvent);
1226 xpti::addMetadata(TE,
"sycl_device", deviceToID(
MQueue->get_device()));
1227 xpti::addMetadata(TE,
"sycl_device_type",
1229 xpti::addMetadata(TE,
"sycl_device_name",
1231 xpti::addMetadata(TE,
"memory_object",
reinterpret_cast<size_t>(
MAddress));
1254 MEvent->getHandleRef() !=
nullptr;
1257 cl_int UnMapMemObject::enqueueImp() {
1260 std::vector<RT::PiEvent> RawEvents =
getPiEvents(EventImpls);
1266 std::move(RawEvents), Event);
1272 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#EBC40F\", label=\"";
1274 Stream <<
"ID = " <<
this <<
" ; ";
1277 Stream <<
"\"];" << std::endl;
1279 for (
const auto &Dep :
MDeps) {
1280 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1281 <<
" [ label = \"Access mode: "
1283 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1294 MSrcQueue(SrcQueue), MSrcReq(
std::move(SrcReq)),
1295 MSrcAllocaCmd(SrcAllocaCmd), MDstReq(
std::move(DstReq)),
1296 MDstAllocaCmd(DstAllocaCmd) {
1297 if (!MSrcQueue->is_host())
1298 MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
1304 #ifdef XPTI_ENABLE_INSTRUMENTATION
1305 if (!xptiTraceEnabled())
1313 xpti_td *CmdTraceEvent =
static_cast<xpti_td *
>(
MTraceEvent);
1314 xpti::addMetadata(CmdTraceEvent,
"sycl_device",
1315 deviceToID(
MQueue->get_device()));
1316 xpti::addMetadata(CmdTraceEvent,
"sycl_device_type",
1318 xpti::addMetadata(CmdTraceEvent,
"sycl_device_name",
1320 xpti::addMetadata(CmdTraceEvent,
"memory_object",
1321 reinterpret_cast<size_t>(
MAddress));
1322 xpti::addMetadata(CmdTraceEvent,
"copy_from",
1323 reinterpret_cast<size_t>(
1326 CmdTraceEvent,
"copy_to",
1357 return MQueue->is_host() ||
1359 MEvent->getHandleRef() !=
nullptr;
1362 cl_int MemCpyCommand::enqueueImp() {
1382 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#C7EB15\" label=\"";
1384 Stream <<
"ID = " <<
this <<
" ; ";
1386 Stream <<
"From: " << MSrcAllocaCmd <<
" is host: " << MSrcQueue->is_host()
1388 Stream <<
"To: " << MDstAllocaCmd <<
" is host: " <<
MQueue->is_host()
1391 Stream <<
"\"];" << std::endl;
1393 for (
const auto &Dep :
MDeps) {
1394 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1395 <<
" [ label = \"Access mode: "
1397 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1404 if (Dep.MDepRequirement == Req)
1405 return Dep.MAllocaCmd;
1412 return ((
CGExecKernel *)MCommandGroup.get())->getStreams();
1416 std::vector<std::shared_ptr<const void>>
1419 return ((
CGExecKernel *)MCommandGroup.get())->getAuxiliaryResources();
1433 cl_int UpdateHostRequirementCommand::enqueueImp() {
1439 assert(MSrcAllocaCmd &&
"Expected valid alloca command");
1440 assert(MSrcAllocaCmd->
getMemAllocation() &&
"Expected valid source pointer");
1441 assert(MDstPtr &&
"Expected valid target pointer");
1448 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#f1337f\", label=\"";
1450 Stream <<
"ID = " <<
this <<
"\\n";
1452 bool IsReqOnBuffer =
1454 Stream <<
"TYPE: " << (IsReqOnBuffer ?
"Buffer" :
"Image") <<
"\\n";
1456 Stream <<
"Is sub buffer: " << std::boolalpha << MDstReq.
MIsSubBuffer
1459 Stream <<
"\"];" << std::endl;
1461 for (
const auto &Dep :
MDeps) {
1462 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1463 <<
" [ label = \"Access mode: "
1465 <<
"MemObj: " << Dep.MAllocaCmd->getSYCLMemObj() <<
" \" ]"
1476 MSrcQueue(SrcQueue), MSrcReq(
std::move(SrcReq)),
1477 MSrcAllocaCmd(SrcAllocaCmd), MDstReq(
std::move(DstReq)), MDstPtr(DstPtr) {
1478 if (!MSrcQueue->is_host())
1479 MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
1485 #ifdef XPTI_ENABLE_INSTRUMENTATION
1486 if (!xptiTraceEnabled())
1494 xpti_td *CmdTraceEvent =
static_cast<xpti_td *
>(
MTraceEvent);
1495 xpti::addMetadata(CmdTraceEvent,
"sycl_device",
1496 deviceToID(
MQueue->get_device()));
1497 xpti::addMetadata(CmdTraceEvent,
"sycl_device_type",
1499 xpti::addMetadata(CmdTraceEvent,
"sycl_device_name",
1501 xpti::addMetadata(CmdTraceEvent,
"memory_object",
1502 reinterpret_cast<size_t>(
MAddress));
1503 xpti::addMetadata(CmdTraceEvent,
"copy_from",
1504 reinterpret_cast<size_t>(
1507 CmdTraceEvent,
"copy_to",
1522 cl_int MemCpyCommandHost::enqueueImp() {
1526 std::vector<RT::PiEvent> RawEvents =
getPiEvents(EventImpls);
1545 MDstReq.
MElemSize, std::move(RawEvents), Event);
1555 cl_int EmptyCommand::enqueueImp() {
1565 MRequirements.emplace_back(ReqRef);
1566 const Requirement *
const StoredReq = &MRequirements.back();
1569 std::vector<Command *> ToCleanUp;
1571 assert(Cmd ==
nullptr &&
"Conection command should be null for EmptyCommand");
1572 assert(ToCleanUp.empty() &&
"addDep should add a command for cleanup only if "
1573 "there's a connection command");
1578 #ifdef XPTI_ENABLE_INSTRUMENTATION
1579 if (!xptiTraceEnabled())
1583 if (MRequirements.empty())
1592 xpti_td *CmdTraceEvent =
static_cast<xpti_td *
>(
MTraceEvent);
1593 xpti::addMetadata(CmdTraceEvent,
"sycl_device",
1594 deviceToID(
MQueue->get_device()));
1595 xpti::addMetadata(CmdTraceEvent,
"sycl_device_type",
1597 xpti::addMetadata(CmdTraceEvent,
"sycl_device_name",
1599 xpti::addMetadata(CmdTraceEvent,
"memory_object",
1600 reinterpret_cast<size_t>(
MAddress));
1607 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#8d8f29\", label=\"";
1609 Stream <<
"ID = " <<
this <<
"\\n";
1610 Stream <<
"EMPTY NODE"
1613 Stream <<
"\"];" << std::endl;
1615 for (
const auto &Dep :
MDeps) {
1616 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1617 <<
" [ label = \"Access mode: "
1619 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1627 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
1629 Stream <<
"ID = " <<
this <<
"\\n";
1632 Stream <<
"\"];" << std::endl;
1634 for (
const auto &Dep :
MDeps) {
1635 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1636 <<
" [ label = \"Access mode: "
1638 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1647 MSrcAllocaCmd(SrcAllocaCmd), MDstReq(
std::move(Req)), MDstPtr(DstPtr) {
1653 #ifdef XPTI_ENABLE_INSTRUMENTATION
1654 if (!xptiTraceEnabled())
1662 xpti_td *CmdTraceEvent =
static_cast<xpti_td *
>(
MTraceEvent);
1663 xpti::addMetadata(CmdTraceEvent,
"sycl_device",
1664 deviceToID(
MQueue->get_device()));
1665 xpti::addMetadata(CmdTraceEvent,
"sycl_device_type",
1667 xpti::addMetadata(CmdTraceEvent,
"sycl_device_name",
1669 xpti::addMetadata(CmdTraceEvent,
"memory_object",
1670 reinterpret_cast<size_t>(
MAddress));
1682 return "update_host";
1688 return "copy acc to acc";
1691 return "copy acc to ptr";
1694 return "copy ptr to acc";
1703 return "prefetch usm";
1717 MCommandGroup(
std::move(CommandGroup)) {
1721 MEvent->setNeedsCleanupAfterWait(
true);
1722 }
else if (MCommandGroup->getType() == CG::CGTYPE::Kernel &&
1723 (
static_cast<CGExecKernel *
>(MCommandGroup.get())->hasStreams() ||
1725 ->hasAuxiliaryResources()))
1726 MEvent->setNeedsCleanupAfterWait(
true);
1732 #ifdef XPTI_ENABLE_INSTRUMENTATION
1733 if (!xptiTraceEnabled())
1737 bool HasSourceInfo =
false;
1738 std::string KernelName;
1739 std::optional<bool> FromSource;
1740 switch (MCommandGroup->getType()) {
1745 if (KernelCG->MSyclKernel && KernelCG->MSyclKernel->isCreatedFromSource()) {
1747 pi_kernel KernelHandle = KernelCG->MSyclKernel->getHandleRef();
1749 KernelName = MCommandGroup->MFunctionName;
1766 xpti::payload_t Payload;
1767 if (!MCommandGroup->MFileName.empty()) {
1770 xpti::payload_t(KernelName.c_str(), MCommandGroup->MFileName.c_str(),
1771 MCommandGroup->MLine, MCommandGroup->MColumn,
MAddress);
1772 HasSourceInfo =
true;
1775 Payload = xpti::payload_t(KernelName.c_str(),
MAddress);
1779 Payload = xpti::payload_t(KernelName.c_str());
1782 uint64_t CGKernelInstanceNo;
1784 xpti_td *CmdTraceEvent =
1785 xptiMakeEvent(
"ExecCG", &Payload, xpti::trace_graph_event,
1786 xpti::trace_activity_type_t::active, &CGKernelInstanceNo);
1788 if (CmdTraceEvent) {
1794 if (CGKernelInstanceNo > 1)
1797 xpti::addMetadata(CmdTraceEvent,
"sycl_device",
1798 deviceToID(
MQueue->get_device()));
1799 xpti::addMetadata(CmdTraceEvent,
"sycl_device_type",
1801 xpti::addMetadata(CmdTraceEvent,
"sycl_device_name",
1803 if (!KernelName.empty()) {
1804 xpti::addMetadata(CmdTraceEvent,
"kernel_name", KernelName);
1806 if (FromSource.has_value()) {
1807 xpti::addMetadata(CmdTraceEvent,
"from_source", FromSource.value());
1809 if (HasSourceInfo) {
1810 xpti::addMetadata(CmdTraceEvent,
"sym_function_name", KernelName);
1811 xpti::addMetadata(CmdTraceEvent,
"sym_source_file_name",
1812 MCommandGroup->MFileName);
1813 xpti::addMetadata(CmdTraceEvent,
"sym_line_no", MCommandGroup->MLine);
1814 xpti::addMetadata(CmdTraceEvent,
"sym_column_no", MCommandGroup->MColumn);
1820 auto &NDRDesc = KernelCG->
MNDRDesc;
1821 std::vector<ArgDesc> Args;
1828 std::mutex *KernelMutex =
nullptr;
1830 std::shared_ptr<kernel_impl> SyclKernelImpl;
1831 std::shared_ptr<device_image_impl> DeviceImageImpl;
1842 KernelCG->MKernelName);
1847 ->get_program_ref();
1848 }
else if (
nullptr != KernelCG->MSyclKernel) {
1850 KernelCG->MSyclKernel->get_info<info::kernel::program>());
1851 Program = SyclProg->getHandleRef();
1853 std::tie(Kernel, KernelMutex, Program) =
1855 KernelCG->MOSModuleHandle,
MQueue->getContextImplPtr(),
1856 MQueue->getDeviceImplPtr(), KernelCG->MKernelName,
nullptr);
1860 if (
nullptr == KernelCG->MSyclKernel ||
1861 !KernelCG->MSyclKernel->isCreatedFromSource()) {
1864 KernelCG->MOSModuleHandle, Program, KernelCG->MKernelName);
1869 xpti::offload_kernel_enqueue_data_t KernelData{
1870 {NDRDesc.GlobalSize[0], NDRDesc.GlobalSize[1], NDRDesc.GlobalSize[2]},
1871 {NDRDesc.LocalSize[0], NDRDesc.LocalSize[1], NDRDesc.LocalSize[2]},
1872 {NDRDesc.GlobalOffset[0], NDRDesc.GlobalOffset[1],
1873 NDRDesc.GlobalOffset[2]},
1875 xpti::addMetadata(CmdTraceEvent,
"enqueue_kernel_data", KernelData);
1876 for (
size_t i = 0; i < Args.size(); i++) {
1877 std::string Prefix(
"arg");
1878 xpti::offload_kernel_arg_data_t arg{(int)Args[i].
MType, Args[i].MPtr,
1879 Args[i].MSize, Args[i].MIndex};
1880 xpti::addMetadata(CmdTraceEvent, Prefix + std::to_string(i), arg);
1884 xptiNotifySubscribers(
MStreamID, xpti::trace_node_create,
1885 detail::GSYCLGraphEvent, CmdTraceEvent,
1893 Stream <<
"\"" <<
this <<
"\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
1895 Stream <<
"ID = " <<
this <<
"\\n";
1898 switch (MCommandGroup->getType()) {
1902 Stream <<
"Kernel name: ";
1903 if (KernelCG->MSyclKernel && KernelCG->MSyclKernel->isCreatedFromSource())
1904 Stream <<
"created from source";
1911 Stream <<
"CG type: " <<
cgTypeToString(MCommandGroup->getType()) <<
"\\n";
1915 Stream <<
"\"];" << std::endl;
1917 for (
const auto &Dep :
MDeps) {
1918 Stream <<
" \"" <<
this <<
"\" -> \"" << Dep.MDepCommand <<
"\""
1919 <<
" [ label = \"Access mode: "
1921 <<
"MemObj: " << Dep.MDepRequirement->MSYCLMemObj <<
" \" ]"
1950 if (WGSize[0] == 0) {
1975 const std::shared_ptr<device_image_impl> &DeviceImageImpl,
1979 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc) {
1982 auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
1984 switch (Arg.MType) {
1989 assert(getMemAllocationFunc !=
nullptr &&
1990 "The function should not be nullptr as we followed the path for "
1991 "which accessors are used");
2010 ->getOrCreateSampler(Queue->get_context());
2017 Arg.MSize, Arg.MPtr);
2021 if (Queue->is_host()) {
2022 throw cl::sycl::feature_not_supported(
2023 "SYCL2020 specialization constants are not yet supported on host "
2027 assert(DeviceImageImpl !=
nullptr);
2028 RT::PiMem SpecConstsBuffer = DeviceImageImpl->get_spec_const_buffer_ref();
2031 SpecConstsBuffer ? &SpecConstsBuffer :
nullptr;
2033 SpecConstsBufferArg);
2047 const bool HasLocalSize = (NDRDesc.
LocalSize[0] != 0);
2051 size_t RequiredWGSize[3] = {0, 0, 0};
2052 size_t *LocalSize =
nullptr;
2058 Kernel, Queue->getDeviceImplPtr()->getHandleRef(),
2060 RequiredWGSize,
nullptr);
2062 const bool EnforcedLocalSize =
2063 (RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 ||
2064 RequiredWGSize[2] != 0);
2065 if (EnforcedLocalSize)
2066 LocalSize = RequiredWGSize;
2072 RawEvents.empty() ? nullptr : &RawEvents[0], OutEvent);
2080 void **CastedBlob = (
void **)Blob;
2082 std::vector<Requirement *> *Reqs =
2083 static_cast<std::vector<Requirement *> *
>(CastedBlob[0]);
2085 std::unique_ptr<HostKernelBase> *
HostKernel =
2086 static_cast<std::unique_ptr<HostKernelBase> *
>(CastedBlob[1]);
2091 void **NextArg = CastedBlob + 3;
2093 Req->
MData = *(NextArg++);
2095 (*HostKernel)->call(*NDRDesc,
nullptr);
2106 const std::shared_ptr<detail::kernel_impl> &MSyclKernel,
2108 std::vector<RT::PiEvent> &RawEvents,
RT::PiEvent *OutEvent,
2109 const std::function<
void *(
Requirement *Req)> &getMemAllocationFunc) {
2112 auto ContextImpl = Queue->getContextImplPtr();
2113 auto DeviceImpl = Queue->getDeviceImplPtr();
2115 std::mutex *KernelMutex =
nullptr;
2118 std::shared_ptr<kernel_impl> SyclKernelImpl;
2119 std::shared_ptr<device_image_impl> DeviceImageImpl;
2134 Kernel = SyclKernelImpl->getHandleRef();
2135 DeviceImageImpl = SyclKernelImpl->getDeviceImage();
2137 Program = DeviceImageImpl->get_program_ref();
2143 }
else if (
nullptr != MSyclKernel) {
2145 Queue->get_context());
2146 Kernel = MSyclKernel->getHandleRef();
2150 Program = SyclProg->getHandleRef();
2151 if (SyclProg->is_cacheable()) {
2153 std::tie(FoundKernel, KernelMutex, std::ignore) =
2157 assert(FoundKernel == Kernel);
2160 std::tie(Kernel, KernelMutex, Program) =
2167 if (
nullptr == MSyclKernel || !MSyclKernel->isCreatedFromSource()) {
2172 if (KernelMutex !=
nullptr) {
2174 std::lock_guard<std::mutex> Lock(*KernelMutex);
2176 NDRDesc, RawEvents, OutEvent,
2177 EliminatedArgMask, getMemAllocationFunc);
2180 NDRDesc, RawEvents, OutEvent,
2181 EliminatedArgMask, getMemAllocationFunc);
2187 const device_impl &DeviceImpl = *(Queue->getDeviceImplPtr());
2195 cl_int ExecCGCommand::enqueueImp() {
2203 MCommandGroup->MRequirements.size() == 0)
2205 : &
MEvent->getHandleRef();
2206 switch (MCommandGroup->getType()) {
2208 case CG::CGTYPE::UpdateHost: {
2209 throw runtime_error(
"Update host should be handled by the Scheduler.",
2212 case CG::CGTYPE::CopyAccToPtr: {
2213 CGCopy *Copy = (CGCopy *)MCommandGroup.get();
2215 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2218 AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(),
MQueue,
2219 Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset,
2220 Req->MElemSize, Copy->getDst(),
2222 Req->MAccessRange, Req->MAccessRange, {0, 0, 0},
2223 Req->MElemSize, std::move(RawEvents),
MEvent->getHandleRef());
2227 case CG::CGTYPE::CopyPtrToAcc: {
2228 CGCopy *Copy = (CGCopy *)MCommandGroup.get();
2230 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2235 AllocaCmd->getSYCLMemObj(), Copy->getSrc(),
2237 Req->MAccessRange, Req->MAccessRange,
2238 {0, 0, 0}, Req->MElemSize, AllocaCmd->getMemAllocation(),
2239 MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset,
2240 Req->MElemSize, std::move(RawEvents),
MEvent->getHandleRef());
2244 case CG::CGTYPE::CopyAccToAcc: {
2245 CGCopy *Copy = (CGCopy *)MCommandGroup.get();
2249 AllocaCommandBase *AllocaCmdSrc = getAllocaForReq(ReqSrc);
2250 AllocaCommandBase *AllocaCmdDst = getAllocaForReq(ReqDst);
2253 AllocaCmdSrc->getSYCLMemObj(), AllocaCmdSrc->getMemAllocation(),
MQueue,
2254 ReqSrc->MDims, ReqSrc->MMemoryRange, ReqSrc->MAccessRange,
2255 ReqSrc->MOffset, ReqSrc->MElemSize, AllocaCmdDst->getMemAllocation(),
2256 MQueue, ReqDst->MDims, ReqDst->MMemoryRange, ReqDst->MAccessRange,
2257 ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents),
2262 case CG::CGTYPE::Fill: {
2263 CGFill *Fill = (CGFill *)MCommandGroup.get();
2265 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2268 AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(),
MQueue,
2269 Fill->MPattern.size(), Fill->MPattern.data(), Req->MDims,
2270 Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize,
2271 std::move(RawEvents),
MEvent->getHandleRef());
2275 case CG::CGTYPE::RunOnHostIntel: {
2276 CGExecKernel *HostTask = (CGExecKernel *)MCommandGroup.get();
2283 std::vector<void *> ArgsBlob(HostTask->MArgs.size() + 3);
2285 std::vector<Requirement *> *CopyReqs =
2286 new std::vector<Requirement *>(HostTask->MRequirements);
2290 std::unique_ptr<HostKernelBase> *CopyHostKernel =
2291 new std::unique_ptr<HostKernelBase>(std::move(HostTask->MHostKernel));
2293 NDRDescT *CopyNDRDesc =
new NDRDescT(HostTask->MNDRDesc);
2295 ArgsBlob[0] = (
void *)CopyReqs;
2296 ArgsBlob[1] = (
void *)CopyHostKernel;
2297 ArgsBlob[2] = (
void *)CopyNDRDesc;
2299 void **NextArg = ArgsBlob.data() + 3;
2302 for (ArgDesc &Arg : HostTask->MArgs) {
2306 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2308 *NextArg = AllocaCmd->getMemAllocation();
2312 if (!RawEvents.empty()) {
2314 const detail::plugin &Plugin = EventImpls[0]->getPlugin();
2322 std::vector<pi_mem> Buffers;
2326 std::vector<void *> MemLocs;
2328 for (ArgDesc &Arg : HostTask->MArgs) {
2332 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2335 Buffers.push_back(MemArg);
2336 MemLocs.push_back(NextArg);
2339 const detail::plugin &Plugin =
MQueue->getPlugin();
2342 ArgsBlob.size() *
sizeof(ArgsBlob[0]), Buffers.size(), Buffers.data(),
2343 const_cast<const void **
>(MemLocs.data()), RawEvents.size(),
2344 RawEvents.empty() ? nullptr : RawEvents.data(), Event);
2348 throw cl::sycl::runtime_error(
2349 "Device doesn't support run_on_host_intel tasks.", Error);
2353 throw cl::sycl::runtime_error(
2354 "Enqueueing run_on_host_intel task has failed.", Error);
2357 case CG::CGTYPE::Kernel: {
2358 CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get();
2360 NDRDescT &NDRDesc = ExecKernel->MNDRDesc;
2361 std::vector<ArgDesc> &Args = ExecKernel->MArgs;
2363 if (
MQueue->is_host() || (
MQueue->getPlugin().getBackend() ==
2365 for (ArgDesc &Arg : Args)
2368 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2369 Req->MData = AllocaCmd->getMemAllocation();
2371 if (!RawEvents.empty()) {
2373 const detail::plugin &Plugin = EventImpls[0]->getPlugin();
2378 ExecKernel->MHostKernel->call(NDRDesc,
2379 getEvent()->getHostProfilingInfo());
2381 assert(
MQueue->getPlugin().getBackend() ==
2386 reinterpret_cast<pi_kernel>(ExecKernel->MHostKernel->getPtr()),
2387 NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0],
2388 &NDRDesc.LocalSize[0], 0,
nullptr,
nullptr);
2394 auto getMemAllocationFunc = [
this](
Requirement *Req) {
2395 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2396 return AllocaCmd->getMemAllocation();
2399 const std::shared_ptr<detail::kernel_impl> &SyclKernel =
2400 ExecKernel->MSyclKernel;
2401 const std::string &KernelName = ExecKernel->MKernelName;
2406 bool KernelUsesAssert = !(SyclKernel && SyclKernel->isInterop()) &&
2409 if (KernelUsesAssert) {
2410 Event = &
MEvent->getHandleRef();
2415 MQueue, NDRDesc, Args, ExecKernel->getKernelBundle(), SyclKernel,
2416 KernelName,
OSModuleHandle, RawEvents, Event, getMemAllocationFunc);
2418 case CG::CGTYPE::CopyUSM: {
2419 CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get();
2421 Copy->getDst(), std::move(RawEvents), Event);
2425 case CG::CGTYPE::FillUSM: {
2426 CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get();
2428 Fill->getFill(), std::move(RawEvents), Event);
2432 case CG::CGTYPE::PrefetchUSM: {
2433 CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get();
2435 Prefetch->getLength(), std::move(RawEvents),
2440 case CG::CGTYPE::AdviseUSM: {
2441 CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get();
2443 Advise->getAdvice(), std::move(RawEvents), Event);
2447 case CG::CGTYPE::CodeplayInteropTask: {
2448 const detail::plugin &Plugin =
MQueue->getPlugin();
2449 CGInteropTask *ExecInterop = (CGInteropTask *)MCommandGroup.get();
2453 if (!RawEvents.empty()) {
2456 std::vector<interop_handler::ReqToMem> ReqMemObjs;
2459 const auto &HandlerReq = ExecInterop->MRequirements;
2461 std::begin(HandlerReq), std::end(HandlerReq), [&](
Requirement *Req) {
2462 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2463 auto MemArg =
reinterpret_cast<pi_mem>(AllocaCmd->getMemAllocation());
2464 interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg);
2465 ReqMemObjs.emplace_back(ReqToMem);
2468 std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs));
2469 interop_handler InteropHandler(std::move(ReqMemObjs),
MQueue);
2470 ExecInterop->MInteropTask->call(InteropHandler);
2476 case CG::CGTYPE::CodeplayHostTask: {
2477 CGHostTask *HostTask =
static_cast<CGHostTask *
>(MCommandGroup.get());
2479 for (ArgDesc &Arg : HostTask->MArgs) {
2480 switch (Arg.MType) {
2483 AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
2485 Req->MData = AllocaCmd->getMemAllocation();
2493 std::vector<interop_handle::ReqToMem> ReqToMem;
2495 if (HostTask->MHostTask->isInteropTask()) {
2498 const std::vector<Requirement *> &HandlerReq = HostTask->MRequirements;
2499 auto ReqToMemConv = [&ReqToMem, HostTask](
Requirement *Req) {
2500 const std::vector<AllocaCommandBase *> &AllocaCmds =
2501 Req->MSYCLMemObj->MRecord->MAllocaCommands;
2503 for (AllocaCommandBase *AllocaCmd : AllocaCmds)
2504 if (HostTask->MQueue->getContextImplPtr() ==
2505 AllocaCmd->getQueue()->getContextImplPtr()) {
2507 reinterpret_cast<pi_mem>(AllocaCmd->getMemAllocation());
2508 ReqToMem.emplace_back(std::make_pair(Req, MemArg));
2514 "Can't get memory object due to no allocation available");
2516 throw runtime_error(
2517 "Can't get memory object due to no allocation available",
2520 std::for_each(std::begin(HandlerReq), std::end(HandlerReq), ReqToMemConv);
2521 std::sort(std::begin(ReqToMem), std::end(ReqToMem));
2531 case CG::CGTYPE::Barrier: {
2532 if (
MQueue->get_device().is_host()) {
2536 const detail::plugin &Plugin =
MQueue->getPlugin();
2538 MQueue->getHandleRef(), 0,
nullptr, Event);
2542 case CG::CGTYPE::BarrierWaitlist: {
2543 CGBarrier *Barrier =
static_cast<CGBarrier *
>(MCommandGroup.get());
2544 std::vector<detail::EventImplPtr> Events = Barrier->MEventsWaitWithBarrier;
2545 std::vector<RT::PiEvent> PiEvents =
getPiEvents(Events);
2546 if (
MQueue->get_device().is_host() || PiEvents.empty()) {
2551 const detail::plugin &Plugin =
MQueue->getPlugin();
2553 MQueue->getHandleRef(), PiEvents.size(), &PiEvents[0], Event);
2557 case CG::CGTYPE::None:
2564 return MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask;
2571 (MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask) &&
2572 (MCommandGroup->getType() != CG::CGTYPE::Kernel ||
2573 (!
static_cast<CGExecKernel *
>(MCommandGroup.get())->hasStreams() &&
2575 ->hasAuxiliaryResources()));