33 inline namespace _V1 {
46 if (DstPtrType == sycl::usm::alloc::device) {
48 if (SrcPtrType == sycl::usm::alloc::device)
50 if (SrcPtrType == sycl::usm::alloc::host ||
51 SrcPtrType == sycl::usm::alloc::unknown)
54 "Unknown copy source location");
56 if (DstPtrType == sycl::usm::alloc::host ||
57 DstPtrType == sycl::usm::alloc::unknown) {
59 if (SrcPtrType == sycl::usm::alloc::device)
61 if (SrcPtrType == sycl::usm::alloc::host ||
62 SrcPtrType == sycl::usm::alloc::unknown)
64 "Cannot copy image from host to host");
66 "Unknown copy source location");
69 "Unknown copy destination location");
74 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
bool IsHost)
75 :
handler(Queue, Queue, nullptr, IsHost) {}
77 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
78 std::shared_ptr<detail::queue_impl> PrimaryQueue,
79 std::shared_ptr<detail::queue_impl> SecondaryQueue,
81 : MImpl(
std::make_shared<detail::handler_impl>(
std::move(PrimaryQueue),
82 std::move(SecondaryQueue))),
83 MQueue(
std::move(Queue)), MIsHost(IsHost) {}
86 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph)
87 : MImpl(
std::make_shared<detail::handler_impl>()), MGraph(Graph) {}
92 void handler::setStateExplicitKernelBundle() {
93 MImpl->setStateExplicitKernelBundle();
99 void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); }
103 bool handler::isStateExplicitKernelBundle()
const {
104 return MImpl->isStateExplicitKernelBundle();
111 std::shared_ptr<detail::kernel_bundle_impl>
112 handler::getOrInsertHandlerKernelBundle(
bool Insert)
const {
113 if (!MImpl->MKernelBundle && Insert) {
114 auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context();
115 auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
117 get_kernel_bundle<bundle_state::input>(Ctx, {Dev}, {}));
119 return MImpl->MKernelBundle;
123 void handler::setHandlerKernelBundle(
124 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
125 MImpl->MKernelBundle = NewKernelBundleImpPtr;
128 void handler::setHandlerKernelBundle(kernel Kernel) {
132 std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpl =
134 setHandlerKernelBundle(KernelBundleImpl);
137 event handler::finalize() {
148 if (MQueue && MQueue->getCommandGraph() && MSubgraphNode) {
149 return detail::createSyclObjFromImpl<event>(
150 MQueue->getCommandGraph()->getEventForNode(MSubgraphNode));
157 for (
const auto &
arg : MArgs) {
163 if (AccImpl->MIsPlaceH) {
168 "placeholder accessor must be bound by calling "
169 "handler::require() before it can be used.");
174 const auto &type = getType();
177 std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
178 getOrInsertHandlerKernelBundle(
false);
179 if (KernelBundleImpPtr) {
181 if (!KernelBundleImpPtr->isInterop() &&
182 !MImpl->isStateExplicitKernelBundle()) {
183 auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
186 bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
189 if (!KernelInserted &&
192 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
194 kernel_bundle<bundle_state::executable> ExecKernelBundle =
197 setHandlerKernelBundle(KernelBundleImpPtr);
198 KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
203 "Failed to add kernel to kernel bundle.");
206 switch (KernelBundleImpPtr->get_bundle_state()) {
209 kernel_bundle<bundle_state::executable> ExecBundle =
build(
211 KernelBundleImpPtr));
213 setHandlerKernelBundle(KernelBundleImpPtr);
221 assert(0 &&
"Expected that the bundle is either in input or executable "
227 if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() &&
228 !MQueue->is_in_fusion_mode() &&
230 MStreamStorage.size() ==
237 std::vector<sycl::detail::pi::PiEvent> RawEvents;
240 #ifdef XPTI_ENABLE_INSTRUMENTATION
243 auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
244 StreamID, MKernel, MCodeLoc, MKernelName, MQueue, MNDRDesc,
245 KernelBundleImpPtr, MArgs);
246 auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
247 InstanceID = InstanceID]() {
249 auto EnqueueKernel = [&]() {
252 pi_int32 Result = PI_ERROR_INVALID_VALUE;
253 #ifdef XPTI_ENABLE_INSTRUMENTATION
254 detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
255 xpti::trace_task_begin,
nullptr);
257 if (MQueue->is_host()) {
258 MHostKernel->call(MNDRDesc, (NewEvent)
259 ? NewEvent->getHostProfilingInfo()
263 if (MQueue->getDeviceImplPtr()->getBackend() ==
264 backend::ext_intel_esimd_emulator) {
266 if (NewEvent !=
nullptr)
267 NewEvent->setHostEnqueueTime();
269 nullptr,
reinterpret_cast<pi_kernel>(MHostKernel->getPtr()),
277 MKernel, MKernelName, RawEvents, NewEvent,
278 nullptr, MImpl->MKernelCacheConfig);
281 #ifdef XPTI_ENABLE_INSTRUMENTATION
282 detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
283 xpti::trace_task_end,
nullptr);
288 bool DiscardEvent =
false;
289 if (MQueue->has_discard_events_support()) {
291 bool KernelUsesAssert =
292 !(MKernel && MKernel->isInterop()) &&
294 DiscardEvent = !KernelUsesAssert;
298 if (PI_SUCCESS != EnqueueKernel())
299 throw runtime_error(
"Enqueue process failed.",
300 PI_ERROR_INVALID_OPERATION);
302 NewEvent = std::make_shared<detail::event_impl>(MQueue);
303 NewEvent->setContextImpl(MQueue->getContextImplPtr());
304 NewEvent->setStateIncomplete();
305 NewEvent->setSubmissionTime();
307 if (PI_SUCCESS != EnqueueKernel())
308 throw runtime_error(
"Enqueue process failed.",
309 PI_ERROR_INVALID_OPERATION);
310 else if (NewEvent->is_host() || NewEvent->getHandleRef() ==
nullptr)
311 NewEvent->setComplete();
313 MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
319 std::unique_ptr<detail::CG> CommandGroup;
325 CommandGroup.reset(
new detail::CGExecKernel(
326 std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
327 std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs),
328 MKernelName, std::move(MStreamStorage),
329 std::move(MImpl->MAuxiliaryResources), MCGType,
330 MImpl->MKernelCacheConfig, MCodeLoc));
337 new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData),
338 std::move(MImpl->MAuxiliaryResources), MCodeLoc));
341 CommandGroup.reset(
new detail::CGFill(std::move(MPattern), MDstPtr,
342 std::move(CGData), MCodeLoc));
346 new detail::CGUpdateHost(MDstPtr, std::move(CGData), MCodeLoc));
349 CommandGroup.reset(
new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength,
350 std::move(CGData), MCodeLoc));
353 CommandGroup.reset(
new detail::CGFillUSM(
354 std::move(MPattern), MDstPtr, MLength, std::move(CGData), MCodeLoc));
357 CommandGroup.reset(
new detail::CGPrefetchUSM(MDstPtr, MLength,
358 std::move(CGData), MCodeLoc));
361 CommandGroup.reset(
new detail::CGAdviseUSM(MDstPtr, MLength, MImpl->MAdvice,
362 std::move(CGData), MCGType,
366 CommandGroup.reset(
new detail::CGCopy2DUSM(
367 MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth,
368 MImpl->MHeight, std::move(CGData), MCodeLoc));
371 CommandGroup.reset(
new detail::CGFill2DUSM(
372 std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth,
373 MImpl->MHeight, std::move(CGData), MCodeLoc));
376 CommandGroup.reset(
new detail::CGMemset2DUSM(
377 MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight,
378 std::move(CGData), MCodeLoc));
381 CommandGroup.reset(
new detail::CGHostTask(
382 std::move(MHostTask), MQueue, MQueue->getContextImplPtr(),
383 std::move(MArgs), std::move(CGData), MCGType, MCodeLoc));
387 if (
auto GraphImpl = getCommandGraph(); GraphImpl !=
nullptr) {
390 if (MEventsWaitWithBarrier.size() == 0) {
391 MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
395 std::vector<detail::EventImplPtr> EventsBarriers =
396 GraphImpl->removeBarriersFromExtraDependencies();
397 MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
398 std::begin(EventsBarriers),
399 std::end(EventsBarriers));
402 std::begin(MEventsWaitWithBarrier),
403 std::end(MEventsWaitWithBarrier));
411 new detail::CGBarrier(std::move(MEventsWaitWithBarrier),
412 std::move(CGData), MCGType, MCodeLoc));
417 CommandGroup.reset(
new detail::CGCopyToDeviceGlobal(
418 MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
419 std::move(CGData), MCodeLoc));
423 CommandGroup.reset(
new detail::CGCopyFromDeviceGlobal(
424 MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
425 std::move(CGData), MCodeLoc));
429 CommandGroup.reset(
new detail::CGReadWriteHostPipe(
430 MImpl->HostPipeName, MImpl->HostPipeBlocking, MImpl->HostPipePtr,
431 MImpl->HostPipeTypeSize, MImpl->HostPipeRead, std::move(CGData),
438 if (!MSubgraphNode) {
439 event GraphCompletionEvent =
440 MExecGraph->enqueue(MQueue, std::move(CGData));
441 MLastEvent = GraphCompletionEvent;
446 CommandGroup.reset(
new detail::CGCopyImage(
447 MSrcPtr, MDstPtr, MImpl->MImageDesc, MImpl->MImageFormat,
448 MImpl->MImageCopyFlags, MImpl->MSrcOffset, MImpl->MDestOffset,
449 MImpl->MHostExtent, MImpl->MCopyExtent, std::move(CGData), MCodeLoc));
452 CommandGroup.reset(
new detail::CGSemaphoreWait(
453 MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
456 CommandGroup.reset(
new detail::CGSemaphoreSignal(
457 MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
461 std::cout <<
"WARNING: An empty command group is submitted." << std::endl;
467 if (MGraph || (MQueue && MQueue->getCommandGraph())) {
472 MLastEvent = detail::createSyclObjFromImpl<event>(Event);
478 if (!MSubgraphNode && !CommandGroup)
479 throw sycl::runtime_error(
480 "Internal Error. Command group cannot be constructed.",
481 PI_ERROR_INVALID_OPERATION);
487 MGraphNodeCG = std::move(CommandGroup);
488 return detail::createSyclObjFromImpl<event>(
489 std::make_shared<detail::event_impl>());
494 if (
auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) {
495 auto EventImpl = std::make_shared<detail::event_impl>();
496 std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
505 if (MQueue->isInOrder()) {
509 auto DependentNode = GraphImpl->getLastInorderNode(MQueue);
511 NodeImpl = DependentNode
512 ? GraphImpl->add(MCGType, std::move(CommandGroup),
514 : GraphImpl->add(MCGType, std::move(CommandGroup));
519 GraphImpl->setLastInorderNode(MQueue, NodeImpl);
521 NodeImpl = GraphImpl->add(MCGType, std::move(CommandGroup));
525 GraphImpl->addEventForNode(EventImpl, NodeImpl);
527 EventImpl->setCommandGraph(GraphImpl);
529 return detail::createSyclObjFromImpl<event>(EventImpl);
533 std::move(CommandGroup), std::move(MQueue));
535 MLastEvent = detail::createSyclObjFromImpl<event>(Event);
539 void handler::addReduction(
const std::shared_ptr<const void> &ReduObj) {
540 MImpl->MAuxiliaryResources.push_back(ReduObj);
545 if (getCommandGraph() &&
546 static_cast<detail::SYCLMemObjT *
>(AccImpl->MSYCLMemObj)
547 ->needsWriteBack()) {
549 "Accessors to buffers which have write_back enabled "
550 "are not allowed to be used in command graphs.");
553 if (Req->MAccessMode != sycl::access_mode::read) {
554 auto SYCLMemObj =
static_cast<detail::SYCLMemObjT *
>(Req->MSYCLMemObj);
555 SYCLMemObj->handleWriteAccessorCreation();
558 if (Req->MAccessRange.size() != 0)
568 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
571 static_cast<int>(AccTarget));
574 void handler::associateWithHandler(
575 detail::UnsampledImageAccessorBaseHost *AccBase,
image_target AccTarget) {
577 static_cast<int>(AccTarget));
580 void handler::associateWithHandler(
581 detail::SampledImageAccessorBaseHost *AccBase,
image_target AccTarget) {
583 static_cast<int>(AccTarget));
587 size_t &IndexShift,
int Size,
588 bool IsKernelCreatedFromSource,
590 std::vector<detail::ArgDesc> &Args,
594 AccImpl->
resize(GlobalSize);
596 Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
602 if (!isESIMD && !IsKernelCreatedFromSource) {
605 const size_t SizeAccField =
606 sizeof(size_t) * (AccImpl->
MDims == 0 ? 1 : AccImpl->
MDims);
608 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
612 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
616 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
617 &AccImpl->
MOffset[0], SizeAccField, Index + IndexShift);
622 const int Size,
const size_t Index,
size_t &IndexShift,
623 bool IsKernelCreatedFromSource,
bool IsESIMD) {
627 case kernel_param_kind_t::kind_std_layout:
628 case kernel_param_kind_t::kind_pointer: {
629 MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
632 case kernel_param_kind_t::kind_stream: {
636 detail::AccessorBaseHost *GBufBase =
637 static_cast<detail::AccessorBaseHost *
>(&S->GlobalBuf);
641 IsKernelCreatedFromSource,
644 detail::AccessorBaseHost *GOffsetBase =
645 static_cast<detail::AccessorBaseHost *
>(&S->GlobalOffset);
649 IsKernelCreatedFromSource,
652 detail::AccessorBaseHost *GFlushBase =
653 static_cast<detail::AccessorBaseHost *
>(&S->GlobalFlushBuf);
657 size_t GlobalSize = MNDRDesc.
GlobalSize.size();
663 if (GlobalSize == 0) {
668 IsKernelCreatedFromSource, GlobalSize, MArgs,
671 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
672 &S->FlushBufferSize,
sizeof(S->FlushBufferSize),
677 case kernel_param_kind_t::kind_accessor: {
684 case access::target::constant_buffer: {
687 IsKernelCreatedFromSource,
691 case access::target::local: {
692 detail::LocalAccessorImplHost *LAcc =
693 static_cast<detail::LocalAccessorImplHost *
>(Ptr);
695 range<3> &Size = LAcc->MSize;
696 const int Dims = LAcc->MDims;
697 int SizeInBytes = LAcc->MElemSize;
698 for (
int I = 0; I < Dims; ++I)
699 SizeInBytes *= Size[I];
702 SizeInBytes = std::max(SizeInBytes, 1);
703 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
nullptr,
704 SizeInBytes, Index + IndexShift);
708 if (!IsESIMD && !IsKernelCreatedFromSource) {
710 const size_t SizeAccField = Dims *
sizeof(Size[0]);
711 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
712 SizeAccField, Index + IndexShift);
714 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
715 SizeAccField, Index + IndexShift);
717 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
718 SizeAccField, Index + IndexShift);
725 MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
726 if (!IsKernelCreatedFromSource) {
734 case access::target::host_buffer: {
735 throw sycl::invalid_parameter_error(
"Unsupported accessor target case.",
736 PI_ERROR_INVALID_OPERATION);
742 case kernel_param_kind_t::kind_sampler: {
743 MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr,
sizeof(sampler),
747 case kernel_param_kind_t::kind_specialization_constants_buffer: {
749 kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
753 case kernel_param_kind_t::kind_invalid:
754 throw runtime_error(
"Invalid kernel param kind", PI_ERROR_INVALID_VALUE);
769 void handler::extractArgsAndReqs() {
770 assert(MKernel &&
"MKernel is not initialized");
771 std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
775 UnPreparedArgs.begin(), UnPreparedArgs.end(),
776 [](
const detail::ArgDesc &first,
const detail::ArgDesc &second) ->
bool {
777 return (first.MIndex < second.MIndex);
780 const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
783 size_t IndexShift = 0;
784 for (
size_t I = 0; I < UnPreparedArgs.size(); ++I) {
785 void *Ptr = UnPreparedArgs[I].MPtr;
787 const int &Size = UnPreparedArgs[I].MSize;
788 const int Index = UnPreparedArgs[I].MIndex;
789 processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
794 void handler::extractArgsAndReqsFromLambda(
795 char *LambdaPtr,
size_t KernelArgsNum,
796 const detail::kernel_param_desc_t *KernelArgs,
bool IsESIMD) {
797 const bool IsKernelCreatedFromSource =
false;
798 size_t IndexShift = 0;
801 for (
size_t I = 0; I < KernelArgsNum; ++I) {
802 void *Ptr = LambdaPtr + KernelArgs[I].offset;
804 const int &Size = KernelArgs[I].info;
811 AccTarget == access::target::constant_buffer) ||
814 detail::AccessorBaseHost *AccBase =
815 static_cast<detail::AccessorBaseHost *
>(Ptr);
817 }
else if (AccTarget == access::target::local) {
818 detail::LocalAccessorBaseHost *LocalAccBase =
819 static_cast<detail::LocalAccessorBaseHost *
>(Ptr);
823 processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
831 std::string handler::getKernelName() {
832 return MKernel->get_info<info::kernel::function_name>();
835 void handler::verifyUsedKernelBundle(
const std::string &KernelName) {
836 auto UsedKernelBundleImplPtr =
837 getOrInsertHandlerKernelBundle(
false);
838 if (!UsedKernelBundleImplPtr)
842 if (!MImpl->isStateExplicitKernelBundle())
848 if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
851 "The kernel bundle in use does not contain the kernel");
855 throwIfActionIsCreated();
857 MEventsWaitWithBarrier.resize(WaitList.size());
859 WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
860 [](
const event &Event) { return detail::getSyclObjImpl(Event); });
863 using namespace sycl::detail;
864 bool handler::DisableRangeRounding() {
868 bool handler::RangeRoundingTrace() {
872 void handler::GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
874 SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
875 MinFactor, GoodFactor, MinRange);
879 throwIfActionIsCreated();
880 MSrcPtr =
const_cast<void *
>(Src);
887 throwIfActionIsCreated();
889 MPattern.push_back(
static_cast<char>(Value));
895 throwIfActionIsCreated();
896 MDstPtr =
const_cast<void *
>(Ptr);
902 throwIfActionIsCreated();
903 MDstPtr =
const_cast<void *
>(Ptr);
909 void handler::ext_oneapi_memcpy2d_impl(
void *Dest,
size_t DestPitch,
910 const void *Src,
size_t SrcPitch,
911 size_t Width,
size_t Height) {
913 MSrcPtr =
const_cast<void *
>(Src);
915 MImpl->MSrcPitch = SrcPitch;
916 MImpl->MDstPitch = DestPitch;
917 MImpl->MWidth = Width;
918 MImpl->MHeight = Height;
922 void handler::ext_oneapi_fill2d_impl(
void *Dest,
size_t DestPitch,
923 const void *Value,
size_t ValueSize,
924 size_t Width,
size_t Height) {
927 MPattern.resize(ValueSize);
929 MImpl->MDstPitch = DestPitch;
930 MImpl->MWidth = Width;
931 MImpl->MHeight = Height;
935 void handler::ext_oneapi_memset2d_impl(
void *Dest,
size_t DestPitch,
int Value,
936 size_t Width,
size_t Height) {
939 MPattern.push_back(
static_cast<char>(Value));
940 MImpl->MDstPitch = DestPitch;
941 MImpl->MWidth = Width;
942 MImpl->MHeight = Height;
949 throwIfGraphAssociated<
950 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
951 sycl_ext_oneapi_bindless_images>();
969 MImpl->MSrcOffset = {0, 0, 0};
970 MImpl->MDestOffset = {0, 0, 0};
973 MImpl->MImageDesc = PiDesc;
974 MImpl->MImageFormat = PiFormat;
975 MImpl->MImageCopyFlags =
985 throwIfGraphAssociated<
986 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
987 sycl_ext_oneapi_bindless_images>();
1006 MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1007 MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1008 MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1009 MImpl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]};
1010 MImpl->MImageDesc = PiDesc;
1011 MImpl->MImageFormat = PiFormat;
1012 MImpl->MImageCopyFlags =
1020 throwIfGraphAssociated<
1021 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1022 sycl_ext_oneapi_bindless_images>();
1040 MImpl->MSrcOffset = {0, 0, 0};
1041 MImpl->MDestOffset = {0, 0, 0};
1044 MImpl->MImageDesc = PiDesc;
1045 MImpl->MImageFormat = PiFormat;
1046 MImpl->MImageCopyFlags =
1056 throwIfGraphAssociated<
1057 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1058 sycl_ext_oneapi_bindless_images>();
1067 SrcImgDesc.
depth > 0
1077 MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1078 MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1079 MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1080 MImpl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]};
1081 MImpl->MImageDesc = PiDesc;
1082 MImpl->MImageFormat = PiFormat;
1083 MImpl->MImageCopyFlags =
1089 void *Src,
void *Dest,
1091 throwIfGraphAssociated<
1092 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1093 sycl_ext_oneapi_bindless_images>();
1111 MImpl->MSrcOffset = {0, 0, 0};
1112 MImpl->MDestOffset = {0, 0, 0};
1115 MImpl->MImageDesc = PiDesc;
1117 MImpl->MImageFormat = PiFormat;
1129 throwIfGraphAssociated<
1130 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1131 sycl_ext_oneapi_bindless_images>();
1150 MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1151 MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1152 MImpl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]};
1153 MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1154 MImpl->MImageDesc = PiDesc;
1156 MImpl->MImageFormat = PiFormat;
1165 throwIfGraphAssociated<
1166 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1167 sycl_ext_oneapi_bindless_images>();
1168 MImpl->MInteropSemaphoreHandle =
1175 throwIfGraphAssociated<
1176 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1177 sycl_ext_oneapi_bindless_images>();
1178 MImpl->MInteropSemaphoreHandle =
1185 std::shared_ptr<detail::queue_impl> PrimaryQueue =
1186 MImpl->MSubmissionPrimaryQueue;
1187 if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.
get_context())) ||
1188 (MGraph && (MGraph->getContext() != ExecBundle.
get_context())))
1191 "Context associated with the primary queue is different from the "
1192 "context associated with the kernel bundle");
1194 std::shared_ptr<detail::queue_impl> SecondaryQueue =
1195 MImpl->MSubmissionSecondaryQueue;
1196 if (SecondaryQueue &&
1197 SecondaryQueue->get_context() != ExecBundle.
get_context())
1200 "Context associated with the secondary queue is different from the "
1201 "context associated with the kernel bundle");
1203 setStateExplicitKernelBundle();
1209 if (EventImpl->isDiscarded()) {
1211 "Queue operation cannot depend on discarded event.");
1213 if (
auto Graph = getCommandGraph(); Graph) {
1214 auto EventGraph = EventImpl->getCommandGraph();
1215 if (EventGraph ==
nullptr) {
1218 "Graph nodes cannot depend on events from outside the graph.");
1220 if (EventGraph != Graph) {
1223 "Graph nodes cannot depend on events from another graph.");
1226 CGData.MEvents.push_back(EventImpl);
1230 for (
const event &Event : Events) {
1238 auto &Plugin = ContextImpl->getPlugin();
1242 &SupportsOp,
nullptr);
1246 bool handler::supportsUSMMemcpy2D() {
1247 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1248 {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1257 bool handler::supportsUSMFill2D() {
1258 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1259 {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1268 bool handler::supportsUSMMemset2D() {
1269 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1270 {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1279 id<2> handler::computeFallbackKernelBounds(
size_t Width,
size_t Height) {
1280 device Dev = MQueue->get_device();
1281 range<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
1282 Dev.get_info<info::device::max_compute_units>();
1283 return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
1286 void handler::ext_intel_read_host_pipe(
const std::string &Name,
void *Ptr,
1287 size_t Size,
bool Block) {
1288 MImpl->HostPipeName = Name;
1289 MImpl->HostPipePtr = Ptr;
1290 MImpl->HostPipeTypeSize = Size;
1291 MImpl->HostPipeBlocking = Block;
1292 MImpl->HostPipeRead = 1;
1296 void handler::ext_intel_write_host_pipe(
const std::string &Name,
void *Ptr,
1297 size_t Size,
bool Block) {
1298 MImpl->HostPipeName = Name;
1299 MImpl->HostPipePtr = Ptr;
1300 MImpl->HostPipeTypeSize = Size;
1301 MImpl->HostPipeBlocking = Block;
1302 MImpl->HostPipeRead = 0;
1306 void handler::memcpyToDeviceGlobal(
const void *DeviceGlobalPtr,
const void *Src,
1307 bool IsDeviceImageScoped,
size_t NumBytes,
1309 throwIfActionIsCreated();
1310 MSrcPtr =
const_cast<void *
>(Src);
1311 MDstPtr =
const_cast<void *
>(DeviceGlobalPtr);
1312 MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1314 MImpl->MOffset = Offset;
1318 void handler::memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
1319 bool IsDeviceImageScoped,
size_t NumBytes,
1321 throwIfActionIsCreated();
1322 MSrcPtr =
const_cast<void *
>(DeviceGlobalPtr);
1324 MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1326 MImpl->MOffset = Offset;
1330 void handler::memcpyToHostOnlyDeviceGlobal(
const void *DeviceGlobalPtr,
1332 size_t DeviceGlobalTSize,
1333 bool IsDeviceImageScoped,
1334 size_t NumBytes,
size_t Offset) {
1335 std::weak_ptr<detail::context_impl> WeakContextImpl =
1336 MQueue->getContextImplPtr();
1337 std::weak_ptr<detail::device_impl> WeakDeviceImpl =
1338 MQueue->getDeviceImplPtr();
1343 std::shared_ptr<detail::context_impl> ContextImpl = WeakContextImpl.lock();
1344 std::shared_ptr<detail::device_impl> DeviceImpl = WeakDeviceImpl.lock();
1345 if (ContextImpl && DeviceImpl)
1346 ContextImpl->memcpyToHostOnlyDeviceGlobal(
1347 DeviceImpl, DeviceGlobalPtr, Src, DeviceGlobalTSize,
1348 IsDeviceImageScoped, NumBytes, Offset);
1352 void handler::memcpyFromHostOnlyDeviceGlobal(
void *Dest,
1353 const void *DeviceGlobalPtr,
1354 bool IsDeviceImageScoped,
1355 size_t NumBytes,
size_t Offset) {
1356 const std::shared_ptr<detail::context_impl> &ContextImpl =
1357 MQueue->getContextImplPtr();
1358 const std::shared_ptr<detail::device_impl> &DeviceImpl =
1359 MQueue->getDeviceImplPtr();
1364 ContextImpl->memcpyFromHostOnlyDeviceGlobal(
1365 DeviceImpl, Dest, DeviceGlobalPtr, IsDeviceImageScoped, NumBytes,
1370 const std::shared_ptr<detail::context_impl> &
1371 handler::getContextImplPtr()
const {
1372 return MQueue->getContextImplPtr();
1375 void handler::setKernelCacheConfig(
1377 MImpl->MKernelCacheConfig = Config;
1390 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> ParentGraph;
1392 ParentGraph = MQueue->getCommandGraph();
1394 ParentGraph = MGraph;
1406 ParentGraph->MMutex);
1412 MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl);
1416 if (MQueue && MQueue->isInOrder()) {
1417 ParentGraph->setLastInorderNode(MQueue, MSubgraphNode);
1420 auto SubgraphEvent = std::make_shared<event_impl>();
1421 SubgraphEvent->setCommandGraph(ParentGraph);
1422 ParentGraph->addEventForNode(SubgraphEvent, MSubgraphNode);
1425 MExecGraph = GraphImpl;
1429 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1430 handler::getCommandGraph()
const {
1434 return MQueue->getCommandGraph();
1437 std::optional<std::array<size_t, 3>> handler::getMaxWorkGroups() {
1439 std::array<size_t, 3>
PiResult = {};
1441 Dev->getHandleRef(),
1443 ext::oneapi::experimental::info::device::max_work_groups<3>>::value,
1445 if (Ret == PI_SUCCESS) {