35 inline namespace _V1 {
48 if (DstPtrType == sycl::usm::alloc::device) {
50 if (SrcPtrType == sycl::usm::alloc::device)
52 if (SrcPtrType == sycl::usm::alloc::host ||
53 SrcPtrType == sycl::usm::alloc::unknown)
56 "Unknown copy source location");
58 if (DstPtrType == sycl::usm::alloc::host ||
59 DstPtrType == sycl::usm::alloc::unknown) {
61 if (SrcPtrType == sycl::usm::alloc::device)
63 if (SrcPtrType == sycl::usm::alloc::host ||
64 SrcPtrType == sycl::usm::alloc::unknown)
66 "Cannot copy image from host to host");
68 "Unknown copy source location");
71 "Unknown copy destination location");
82 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
bool IsHost)
83 :
handler(Queue, Queue, nullptr, IsHost) {}
85 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
86 std::shared_ptr<detail::queue_impl> PrimaryQueue,
87 std::shared_ptr<detail::queue_impl> SecondaryQueue,
89 : MImpl(
std::make_shared<detail::handler_impl>(
std::move(PrimaryQueue),
90 std::move(SecondaryQueue))),
91 MQueue(
std::move(Queue)), MIsHost(IsHost) {}
94 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph)
95 : MImpl(
std::make_shared<detail::handler_impl>()), MGraph(Graph) {}
100 void handler::setStateExplicitKernelBundle() {
101 MImpl->setStateExplicitKernelBundle();
107 void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); }
111 bool handler::isStateExplicitKernelBundle()
const {
112 return MImpl->isStateExplicitKernelBundle();
119 std::shared_ptr<detail::kernel_bundle_impl>
120 handler::getOrInsertHandlerKernelBundle(
bool Insert)
const {
121 if (!MImpl->MKernelBundle && Insert) {
122 auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context();
123 auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
125 get_kernel_bundle<bundle_state::input>(Ctx, {Dev}, {}));
127 return MImpl->MKernelBundle;
131 void handler::setHandlerKernelBundle(
132 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
133 MImpl->MKernelBundle = NewKernelBundleImpPtr;
136 void handler::setHandlerKernelBundle(kernel Kernel) {
140 std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpl =
142 setHandlerKernelBundle(KernelBundleImpl);
145 event handler::finalize() {
156 for (
const auto &
arg : MArgs) {
162 if (AccImpl->MIsPlaceH) {
167 "placeholder accessor must be bound by calling "
168 "handler::require() before it can be used.");
171 bool AccFound =
false;
172 for (detail::ArgDesc &Acc : MAssociatedAccesors) {
182 "placeholder accessor must be bound by calling "
183 "handler::require() before it can be used.");
189 const auto &type = getType();
192 std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
193 getOrInsertHandlerKernelBundle(
false);
194 if (KernelBundleImpPtr) {
196 if (!KernelBundleImpPtr->isInterop() &&
197 !MImpl->isStateExplicitKernelBundle()) {
198 auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
201 MKernelName.
c_str());
202 bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
205 if (!KernelInserted &&
208 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
210 kernel_bundle<bundle_state::executable> ExecKernelBundle =
213 setHandlerKernelBundle(KernelBundleImpPtr);
214 KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
219 "Failed to add kernel to kernel bundle.");
222 switch (KernelBundleImpPtr->get_bundle_state()) {
225 kernel_bundle<bundle_state::executable> ExecBundle =
build(
227 KernelBundleImpPtr));
229 setHandlerKernelBundle(KernelBundleImpPtr);
237 assert(0 &&
"Expected that the bundle is either in input or executable "
243 if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() &&
244 !MQueue->is_in_fusion_mode() && !CGData.
MRequirements.size() &&
245 !MStreamStorage.size() &&
247 (MQueue->isInOrder() &&
249 CGData.
MEvents, MQueue->getContextImplPtr())))) {
255 std::vector<sycl::detail::pi::PiEvent> RawEvents;
258 #ifdef XPTI_ENABLE_INSTRUMENTATION
261 auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
262 StreamID, MKernel, MCodeLoc, MKernelName.
c_str(), MQueue, MNDRDesc,
263 KernelBundleImpPtr, MArgs);
264 auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
265 InstanceID = InstanceID]() {
267 auto EnqueueKernel = [&]() {
270 pi_int32 Result = PI_ERROR_INVALID_VALUE;
271 #ifdef XPTI_ENABLE_INSTRUMENTATION
272 detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
273 xpti::trace_task_begin,
nullptr);
275 if (MQueue->is_host()) {
276 MHostKernel->call(MNDRDesc, (NewEvent)
277 ? NewEvent->getHostProfilingInfo()
281 if (MQueue->getDeviceImplPtr()->getBackend() ==
282 backend::ext_intel_esimd_emulator) {
284 if (NewEvent !=
nullptr)
285 NewEvent->setHostEnqueueTime();
287 if (MImpl->MKernelIsCooperative) {
299 reinterpret_cast<pi_kernel>(MHostKernel->getPtr()),
311 MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel,
312 MKernelName.
c_str(), RawEvents, NewEvent,
nullptr,
313 MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative);
316 #ifdef XPTI_ENABLE_INSTRUMENTATION
318 if (NewEvent !=
nullptr) {
319 detail::emitInstrumentationGeneral(
320 StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
321 static_cast<const void *
>(NewEvent->getHandleRef()));
323 detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
324 xpti::trace_task_end,
nullptr);
329 bool DiscardEvent =
false;
330 if (MQueue->supportsDiscardingPiEvents()) {
332 bool KernelUsesAssert =
333 !(MKernel && MKernel->isInterop()) &&
335 MKernelName.
c_str());
336 DiscardEvent = !KernelUsesAssert;
340 if (PI_SUCCESS != EnqueueKernel())
341 throw runtime_error(
"Enqueue process failed.",
342 PI_ERROR_INVALID_OPERATION);
344 NewEvent = std::make_shared<detail::event_impl>(MQueue);
345 NewEvent->setWorkerQueue(MQueue);
346 NewEvent->setContextImpl(MQueue->getContextImplPtr());
347 NewEvent->setStateIncomplete();
348 NewEvent->setSubmissionTime();
350 if (PI_SUCCESS != EnqueueKernel())
351 throw runtime_error(
"Enqueue process failed.",
352 PI_ERROR_INVALID_OPERATION);
353 else if (NewEvent->is_host() || NewEvent->getHandleRef() ==
nullptr)
354 NewEvent->setComplete();
356 MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
362 std::unique_ptr<detail::CG> CommandGroup;
368 CommandGroup.reset(
new detail::CGExecKernel(
369 std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
370 std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs),
371 MKernelName.
c_str(), std::move(MStreamStorage),
372 std::move(MImpl->MAuxiliaryResources), MCGType,
373 MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, MCodeLoc));
380 new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData),
381 std::move(MImpl->MAuxiliaryResources), MCodeLoc));
384 CommandGroup.reset(
new detail::CGFill(std::move(MPattern), MDstPtr,
385 std::move(CGData), MCodeLoc));
389 new detail::CGUpdateHost(MDstPtr, std::move(CGData), MCodeLoc));
392 CommandGroup.reset(
new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength,
393 std::move(CGData), MCodeLoc));
396 CommandGroup.reset(
new detail::CGFillUSM(
397 std::move(MPattern), MDstPtr, MLength, std::move(CGData), MCodeLoc));
400 CommandGroup.reset(
new detail::CGPrefetchUSM(MDstPtr, MLength,
401 std::move(CGData), MCodeLoc));
404 CommandGroup.reset(
new detail::CGAdviseUSM(MDstPtr, MLength, MImpl->MAdvice,
405 std::move(CGData), MCGType,
409 CommandGroup.reset(
new detail::CGCopy2DUSM(
410 MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth,
411 MImpl->MHeight, std::move(CGData), MCodeLoc));
414 CommandGroup.reset(
new detail::CGFill2DUSM(
415 std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth,
416 MImpl->MHeight, std::move(CGData), MCodeLoc));
419 CommandGroup.reset(
new detail::CGMemset2DUSM(
420 MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight,
421 std::move(CGData), MCodeLoc));
425 : MQueue->getContextImplPtr();
426 CommandGroup.reset(
new detail::CGHostTask(
427 std::move(MHostTask), MQueue, context, std::move(MArgs),
428 std::move(CGData), MCGType, MCodeLoc));
433 if (
auto GraphImpl = getCommandGraph(); GraphImpl !=
nullptr) {
436 if (MEventsWaitWithBarrier.size() == 0) {
437 MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
441 std::vector<detail::EventImplPtr> EventsBarriers =
442 GraphImpl->removeBarriersFromExtraDependencies();
443 MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
444 std::begin(EventsBarriers),
445 std::end(EventsBarriers));
448 std::begin(MEventsWaitWithBarrier),
449 std::end(MEventsWaitWithBarrier));
457 new detail::CGBarrier(std::move(MEventsWaitWithBarrier),
458 std::move(CGData), MCGType, MCodeLoc));
463 CommandGroup.reset(
new detail::CGCopyToDeviceGlobal(
464 MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
465 std::move(CGData), MCodeLoc));
469 CommandGroup.reset(
new detail::CGCopyFromDeviceGlobal(
470 MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
471 std::move(CGData), MCodeLoc));
475 CommandGroup.reset(
new detail::CGReadWriteHostPipe(
476 MImpl->HostPipeName, MImpl->HostPipeBlocking, MImpl->HostPipePtr,
477 MImpl->HostPipeTypeSize, MImpl->HostPipeRead, std::move(CGData),
482 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> ParentGraph =
483 MQueue ? MQueue->getCommandGraph() : MGraph;
491 ParentGraph->MMutex);
497 CommandGroup.reset(
new sycl::detail::CGExecCommandBuffer(
498 nullptr, MExecGraph, std::move(CGData)));
501 event GraphCompletionEvent =
502 MExecGraph->enqueue(MQueue, std::move(CGData));
503 MLastEvent = GraphCompletionEvent;
508 CommandGroup.reset(
new detail::CGCopyImage(
509 MSrcPtr, MDstPtr, MImpl->MImageDesc, MImpl->MImageFormat,
510 MImpl->MImageCopyFlags, MImpl->MSrcOffset, MImpl->MDestOffset,
511 MImpl->MHostExtent, MImpl->MCopyExtent, std::move(CGData), MCodeLoc));
514 CommandGroup.reset(
new detail::CGSemaphoreWait(
515 MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
518 CommandGroup.reset(
new detail::CGSemaphoreSignal(
519 MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
523 std::cout <<
"WARNING: An empty command group is submitted." << std::endl;
529 if (MGraph || (MQueue && MQueue->getCommandGraph())) {
534 MLastEvent = detail::createSyclObjFromImpl<event>(Event);
541 throw sycl::runtime_error(
542 "Internal Error. Command group cannot be constructed.",
543 PI_ERROR_INVALID_OPERATION);
549 MGraphNodeCG = std::move(CommandGroup);
550 return detail::createSyclObjFromImpl<event>(
551 std::make_shared<detail::event_impl>());
556 if (
auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) {
557 auto EventImpl = std::make_shared<detail::event_impl>();
558 std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
567 MImpl->MUserFacingNodeType !=
569 ? MImpl->MUserFacingNodeType
573 if (MQueue->isInOrder()) {
577 auto DependentNode = GraphImpl->getLastInorderNode(MQueue);
579 NodeImpl = DependentNode
580 ? GraphImpl->add(NodeType, std::move(CommandGroup),
582 : GraphImpl->add(NodeType, std::move(CommandGroup));
587 GraphImpl->setLastInorderNode(MQueue, NodeImpl);
589 NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup));
593 GraphImpl->addEventForNode(GraphImpl, EventImpl, NodeImpl);
595 NodeImpl->MNDRangeUsed = MImpl->MNDRangeUsed;
597 return detail::createSyclObjFromImpl<event>(EventImpl);
601 std::move(CommandGroup), std::move(MQueue));
603 MLastEvent = detail::createSyclObjFromImpl<event>(Event);
607 void handler::addReduction(
const std::shared_ptr<const void> &ReduObj) {
608 MImpl->MAuxiliaryResources.push_back(ReduObj);
613 if (getCommandGraph() &&
614 static_cast<detail::SYCLMemObjT *
>(AccImpl->MSYCLMemObj)
615 ->needsWriteBack()) {
617 "Accessors to buffers which have write_back enabled "
618 "are not allowed to be used in command graphs.");
621 if (Req->MAccessMode != sycl::access_mode::read) {
622 auto SYCLMemObj =
static_cast<detail::SYCLMemObjT *
>(Req->MSYCLMemObj);
623 SYCLMemObj->handleWriteAccessorCreation();
626 if (Req->MAccessRange.size() != 0)
636 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
639 static_cast<int>(AccTarget));
642 void handler::associateWithHandler(
643 detail::UnsampledImageAccessorBaseHost *AccBase,
image_target AccTarget) {
645 static_cast<int>(AccTarget));
648 void handler::associateWithHandler(
649 detail::SampledImageAccessorBaseHost *AccBase,
image_target AccTarget) {
651 static_cast<int>(AccTarget));
655 size_t &IndexShift,
int Size,
656 bool IsKernelCreatedFromSource,
658 std::vector<detail::ArgDesc> &Args,
662 AccImpl->resize(GlobalSize);
664 Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
670 if (!isESIMD && !IsKernelCreatedFromSource) {
673 const size_t SizeAccField =
674 sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
676 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
677 &AccImpl->MAccessRange[0], SizeAccField,
680 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
681 &AccImpl->MMemoryRange[0], SizeAccField,
684 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
685 &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
690 const int Size,
const size_t Index,
size_t &IndexShift,
691 bool IsKernelCreatedFromSource,
bool IsESIMD) {
695 case kernel_param_kind_t::kind_std_layout:
696 case kernel_param_kind_t::kind_pointer: {
697 MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
700 case kernel_param_kind_t::kind_stream: {
704 detail::AccessorBaseHost *GBufBase =
705 static_cast<detail::AccessorBaseHost *
>(&S->GlobalBuf);
709 IsKernelCreatedFromSource,
712 detail::AccessorBaseHost *GOffsetBase =
713 static_cast<detail::AccessorBaseHost *
>(&S->GlobalOffset);
717 IsKernelCreatedFromSource,
720 detail::AccessorBaseHost *GFlushBase =
721 static_cast<detail::AccessorBaseHost *
>(&S->GlobalFlushBuf);
725 size_t GlobalSize = MNDRDesc.
GlobalSize.size();
731 if (GlobalSize == 0) {
736 IsKernelCreatedFromSource, GlobalSize, MArgs,
739 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
740 &S->FlushBufferSize,
sizeof(S->FlushBufferSize),
745 case kernel_param_kind_t::kind_accessor: {
752 case access::target::constant_buffer: {
755 IsKernelCreatedFromSource,
759 case access::target::local: {
760 detail::LocalAccessorImplHost *LAcc =
761 static_cast<detail::LocalAccessorImplHost *
>(Ptr);
763 range<3> &Size = LAcc->MSize;
764 const int Dims = LAcc->MDims;
765 int SizeInBytes = LAcc->MElemSize;
766 for (
int I = 0; I < Dims; ++I)
767 SizeInBytes *= Size[I];
770 SizeInBytes = std::max(SizeInBytes, 1);
771 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
nullptr,
772 SizeInBytes, Index + IndexShift);
776 if (!IsESIMD && !IsKernelCreatedFromSource) {
778 const size_t SizeAccField = (Dims == 0 ? 1 : Dims) *
sizeof(Size[0]);
779 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
780 SizeAccField, Index + IndexShift);
782 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
783 SizeAccField, Index + IndexShift);
785 MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
786 SizeAccField, Index + IndexShift);
793 MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
794 if (!IsKernelCreatedFromSource) {
802 case access::target::host_buffer: {
803 throw sycl::invalid_parameter_error(
"Unsupported accessor target case.",
804 PI_ERROR_INVALID_OPERATION);
810 case kernel_param_kind_t::kind_sampler: {
811 MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr,
sizeof(sampler),
815 case kernel_param_kind_t::kind_specialization_constants_buffer: {
817 kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
821 case kernel_param_kind_t::kind_invalid:
822 throw runtime_error(
"Invalid kernel param kind", PI_ERROR_INVALID_VALUE);
837 void handler::extractArgsAndReqs() {
838 assert(MKernel &&
"MKernel is not initialized");
839 std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
843 UnPreparedArgs.begin(), UnPreparedArgs.end(),
844 [](
const detail::ArgDesc &first,
const detail::ArgDesc &second) ->
bool {
845 return (first.MIndex < second.MIndex);
848 const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
851 size_t IndexShift = 0;
852 for (
size_t I = 0; I < UnPreparedArgs.size(); ++I) {
853 void *Ptr = UnPreparedArgs[I].MPtr;
855 const int &Size = UnPreparedArgs[I].MSize;
856 const int Index = UnPreparedArgs[I].MIndex;
857 processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
862 void handler::extractArgsAndReqsFromLambda(
863 char *LambdaPtr,
size_t KernelArgsNum,
864 const detail::kernel_param_desc_t *KernelArgs,
bool IsESIMD) {
865 const bool IsKernelCreatedFromSource =
false;
866 size_t IndexShift = 0;
869 for (
size_t I = 0; I < KernelArgsNum; ++I) {
870 void *Ptr = LambdaPtr + KernelArgs[I].offset;
872 const int &Size = KernelArgs[I].info;
879 AccTarget == access::target::constant_buffer) ||
882 detail::AccessorBaseHost *AccBase =
883 static_cast<detail::AccessorBaseHost *
>(Ptr);
885 }
else if (AccTarget == access::target::local) {
886 detail::LocalAccessorBaseHost *LocalAccBase =
887 static_cast<detail::LocalAccessorBaseHost *
>(Ptr);
891 processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
899 detail::string handler::getKernelName() {
900 return detail::string{MKernel->get_info<info::kernel::function_name>()};
903 void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) {
904 auto UsedKernelBundleImplPtr =
905 getOrInsertHandlerKernelBundle(
false);
906 if (!UsedKernelBundleImplPtr)
910 if (!MImpl->isStateExplicitKernelBundle())
916 if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
919 "The kernel bundle in use does not contain the kernel");
923 throwIfActionIsCreated();
925 MEventsWaitWithBarrier.resize(WaitList.size());
927 WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
928 [](
const event &Event) { return detail::getSyclObjImpl(Event); });
932 bool handler::DisableRangeRounding() {
936 bool handler::RangeRoundingTrace() {
940 void handler::GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
942 SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
943 MinFactor, GoodFactor, MinRange);
947 throwIfActionIsCreated();
948 MSrcPtr =
const_cast<void *
>(Src);
955 throwIfActionIsCreated();
957 MPattern.push_back(
static_cast<char>(Value));
964 throwIfActionIsCreated();
965 MDstPtr =
const_cast<void *
>(Ptr);
971 throwIfActionIsCreated();
972 MDstPtr =
const_cast<void *
>(Ptr);
978 void handler::ext_oneapi_memcpy2d_impl(
void *Dest,
size_t DestPitch,
979 const void *Src,
size_t SrcPitch,
980 size_t Width,
size_t Height) {
982 MSrcPtr =
const_cast<void *
>(Src);
984 MImpl->MSrcPitch = SrcPitch;
985 MImpl->MDstPitch = DestPitch;
986 MImpl->MWidth = Width;
987 MImpl->MHeight = Height;
991 void handler::ext_oneapi_fill2d_impl(
void *Dest,
size_t DestPitch,
992 const void *Value,
size_t ValueSize,
993 size_t Width,
size_t Height) {
996 MPattern.resize(ValueSize);
997 std::memcpy(MPattern.data(), Value, ValueSize);
998 MImpl->MDstPitch = DestPitch;
999 MImpl->MWidth = Width;
1000 MImpl->MHeight = Height;
1004 void handler::ext_oneapi_memset2d_impl(
void *Dest,
size_t DestPitch,
int Value,
1005 size_t Width,
size_t Height) {
1008 MPattern.push_back(
static_cast<char>(Value));
1009 MImpl->MDstPitch = DestPitch;
1010 MImpl->MWidth = Width;
1011 MImpl->MHeight = Height;
1018 throwIfGraphAssociated<
1019 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1020 sycl_ext_oneapi_bindless_images>();
1039 Desc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1055 MImpl->MSrcOffset = {0, 0, 0};
1056 MImpl->MDestOffset = {0, 0, 0};
1059 MImpl->MImageDesc = PiDesc;
1060 MImpl->MImageFormat = PiFormat;
1061 MImpl->MImageCopyFlags =
1071 throwIfGraphAssociated<
1072 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1073 sycl_ext_oneapi_bindless_images>();
1092 DestImgDesc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1108 MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1109 MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1110 MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1111 MImpl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]};
1112 MImpl->MImageDesc = PiDesc;
1113 MImpl->MImageFormat = PiFormat;
1114 MImpl->MImageCopyFlags =
1122 throwIfGraphAssociated<
1123 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1124 sycl_ext_oneapi_bindless_images>();
1143 Desc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1159 MImpl->MSrcOffset = {0, 0, 0};
1160 MImpl->MDestOffset = {0, 0, 0};
1163 MImpl->MImageDesc = PiDesc;
1164 MImpl->MImageFormat = PiFormat;
1165 MImpl->MImageCopyFlags =
1175 throwIfGraphAssociated<
1176 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1177 sycl_ext_oneapi_bindless_images>();
1196 SrcImgDesc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1212 MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1213 MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1214 MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1215 MImpl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]};
1216 MImpl->MImageDesc = PiDesc;
1217 MImpl->MImageFormat = PiFormat;
1218 MImpl->MImageCopyFlags =
1224 void *Src,
void *Dest,
1226 throwIfGraphAssociated<
1227 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1228 sycl_ext_oneapi_bindless_images>();
1247 Desc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1263 MImpl->MSrcOffset = {0, 0, 0};
1264 MImpl->MDestOffset = {0, 0, 0};
1267 MImpl->MImageDesc = PiDesc;
1269 MImpl->MImageFormat = PiFormat;
1281 throwIfGraphAssociated<
1282 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1283 sycl_ext_oneapi_bindless_images>();
1302 DeviceImgDesc.
type ==
1303 sycl::ext::oneapi::experimental::image_type::cubemap
1319 MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1320 MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1321 MImpl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]};
1322 MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1323 MImpl->MImageDesc = PiDesc;
1325 MImpl->MImageFormat = PiFormat;
1334 throwIfGraphAssociated<
1335 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1336 sycl_ext_oneapi_bindless_images>();
1337 MImpl->MInteropSemaphoreHandle =
1344 throwIfGraphAssociated<
1345 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1346 sycl_ext_oneapi_bindless_images>();
1347 MImpl->MInteropSemaphoreHandle =
1354 std::shared_ptr<detail::queue_impl> PrimaryQueue =
1355 MImpl->MSubmissionPrimaryQueue;
1356 if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.
get_context())) ||
1357 (MGraph && (MGraph->getContext() != ExecBundle.
get_context())))
1360 "Context associated with the primary queue is different from the "
1361 "context associated with the kernel bundle");
1363 std::shared_ptr<detail::queue_impl> SecondaryQueue =
1364 MImpl->MSubmissionSecondaryQueue;
1365 if (SecondaryQueue &&
1366 SecondaryQueue->get_context() != ExecBundle.
get_context())
1369 "Context associated with the secondary queue is different from the "
1370 "context associated with the kernel bundle");
1372 setStateExplicitKernelBundle();
1378 if (EventImpl->isDiscarded()) {
1380 "Queue operation cannot depend on discarded event.");
1382 if (
auto Graph = getCommandGraph(); Graph) {
1383 auto EventGraph = EventImpl->getCommandGraph();
1384 if (EventGraph ==
nullptr) {
1387 "Graph nodes cannot depend on events from outside the graph.");
1389 if (EventGraph != Graph) {
1392 "Graph nodes cannot depend on events from another graph.");
1395 CGData.MEvents.push_back(EventImpl);
1399 for (
const event &Event : Events) {
1407 auto &Plugin = ContextImpl->getPlugin();
1411 &SupportsOp,
nullptr);
1415 bool handler::supportsUSMMemcpy2D() {
1416 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1417 {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1426 bool handler::supportsUSMFill2D() {
1427 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1428 {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1437 bool handler::supportsUSMMemset2D() {
1438 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1439 {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1448 id<2> handler::computeFallbackKernelBounds(
size_t Width,
size_t Height) {
1449 device Dev = MQueue->get_device();
1450 range<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
1451 Dev.get_info<info::device::max_compute_units>();
1452 return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
1455 void handler::ext_intel_read_host_pipe(detail::string_view Name,
void *Ptr,
1456 size_t Size,
bool Block) {
1457 MImpl->HostPipeName = Name.data();
1458 MImpl->HostPipePtr = Ptr;
1459 MImpl->HostPipeTypeSize = Size;
1460 MImpl->HostPipeBlocking = Block;
1461 MImpl->HostPipeRead = 1;
1465 void handler::ext_intel_write_host_pipe(detail::string_view Name,
void *Ptr,
1466 size_t Size,
bool Block) {
1467 MImpl->HostPipeName = Name.data();
1468 MImpl->HostPipePtr = Ptr;
1469 MImpl->HostPipeTypeSize = Size;
1470 MImpl->HostPipeBlocking = Block;
1471 MImpl->HostPipeRead = 0;
1475 void handler::memcpyToDeviceGlobal(
const void *DeviceGlobalPtr,
const void *Src,
1476 bool IsDeviceImageScoped,
size_t NumBytes,
1478 throwIfActionIsCreated();
1479 MSrcPtr =
const_cast<void *
>(Src);
1480 MDstPtr =
const_cast<void *
>(DeviceGlobalPtr);
1481 MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1483 MImpl->MOffset = Offset;
1487 void handler::memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
1488 bool IsDeviceImageScoped,
size_t NumBytes,
1490 throwIfActionIsCreated();
1491 MSrcPtr =
const_cast<void *
>(DeviceGlobalPtr);
1493 MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1495 MImpl->MOffset = Offset;
1499 void handler::memcpyToHostOnlyDeviceGlobal(
const void *DeviceGlobalPtr,
1501 size_t DeviceGlobalTSize,
1502 bool IsDeviceImageScoped,
1503 size_t NumBytes,
size_t Offset) {
1504 std::weak_ptr<detail::context_impl> WeakContextImpl =
1505 MQueue->getContextImplPtr();
1506 std::weak_ptr<detail::device_impl> WeakDeviceImpl =
1507 MQueue->getDeviceImplPtr();
1512 std::shared_ptr<detail::context_impl> ContextImpl = WeakContextImpl.lock();
1513 std::shared_ptr<detail::device_impl> DeviceImpl = WeakDeviceImpl.lock();
1514 if (ContextImpl && DeviceImpl)
1515 ContextImpl->memcpyToHostOnlyDeviceGlobal(
1516 DeviceImpl, DeviceGlobalPtr, Src, DeviceGlobalTSize,
1517 IsDeviceImageScoped, NumBytes, Offset);
1521 void handler::memcpyFromHostOnlyDeviceGlobal(
void *Dest,
1522 const void *DeviceGlobalPtr,
1523 bool IsDeviceImageScoped,
1524 size_t NumBytes,
size_t Offset) {
1525 const std::shared_ptr<detail::context_impl> &ContextImpl =
1526 MQueue->getContextImplPtr();
1527 const std::shared_ptr<detail::device_impl> &DeviceImpl =
1528 MQueue->getDeviceImplPtr();
1533 ContextImpl->memcpyFromHostOnlyDeviceGlobal(
1534 DeviceImpl, Dest, DeviceGlobalPtr, IsDeviceImageScoped, NumBytes,
1539 const std::shared_ptr<detail::context_impl> &
1540 handler::getContextImplPtr()
const {
1541 return MQueue->getContextImplPtr();
1544 void handler::setKernelCacheConfig(
1546 MImpl->MKernelCacheConfig = Config;
1549 void handler::setKernelIsCooperative(
bool KernelIsCooperative) {
1550 MImpl->MKernelIsCooperative = KernelIsCooperative;
1561 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1562 handler::getCommandGraph()
const {
1566 return MQueue->getCommandGraph();
1570 MImpl->MUserFacingNodeType = Type;
1573 std::optional<std::array<size_t, 3>> handler::getMaxWorkGroups() {
1575 std::array<size_t, 3>
PiResult = {};
1577 Dev->getHandleRef(),
1579 ext::oneapi::experimental::info::device::max_work_groups<3>>::value,
1581 if (Ret == PI_SUCCESS) {
1587 std::tuple<std::array<size_t, 3>,
bool> handler::getMaxWorkGroups_v2() {
1588 auto ImmRess = getMaxWorkGroups();
1590 return {*ImmRess,
true};
1591 return {std::array<size_t, 3>{0, 0, 0},
false};
1594 void handler::setNDRangeUsed(
bool Value) { MImpl->MNDRangeUsed = Value; }
1596 void handler::registerDynamicParameter(
1597 ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase,
1599 if (MQueue && MQueue->getCommandGraph()) {
1602 "Dynamic Parameters cannot be used with Graph Queue recording.");
1607 "Dynamic Parameters cannot be used with normal SYCL submissions");
1611 if (ParamImpl->MGraph != this->MGraph) {
1614 "Cannot use a Dynamic Parameter with a node associated with a graph "
1615 "other than the one it was created with.");
1617 MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex);
sycl::range< 3 > GlobalSize
sycl::range< 3 > NumWorkGroups
Number of workgroups, used to record the number of workgroups from the simplest form of parallel_for_...
sycl::id< 3 > GlobalOffset
sycl::range< 3 > LocalSize
static ProgramManager & getInstance()
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
bool kernelUsesAssert(const std::string &KernelName) const
static Scheduler & getInstance()
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
const char * c_str() const noexcept
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Graph in the modifiable state.
std::unique_lock< std::shared_mutex > WriteLock
Command group handler class.
void depends_on(event Event)
Registers event dependencies on this command group.
void ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue with a non-blocking wait on an external semaphore.
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
void ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
void ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
void use_kernel_bundle(const kernel_bundle< bundle_state::executable > &ExecBundle)
context get_context() const noexcept
Defines the iteration domain of either a single work-group in a parallel dispatch,...
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
bool trace(TraceLevel level)
::pi_interop_semaphore_handle PiInteropSemaphoreHandle
sycl::detail::pi::PiMemImageChannelOrder convertChannelOrder(image_channel_order Order)
void * getValueFromDynamicParameter(ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase)
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
constexpr const char * SYCL_STREAM_NAME
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
std::shared_ptr< event_impl > EventImplPtr
AccessorImplHost Requirement
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative)
kernel_id get_kernel_id_impl(string_view KernelName)
sycl::detail::pi::PiMemImageChannelType convertChannelType(image_channel_type Type)
std::shared_ptr< AccessorImplHost > AccessorImplPtr
sycl::detail::pi::PiImageCopyFlags getPiImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType)
node_type getNodeTypeFromCG(sycl::detail::CG::CGTYPE CGType)
@ executable
In executable state, the graph is ready to execute.
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, _Tp > arg(const complex< _Tp > &__c)
constexpr size_t MaxNumAdditionalArgs
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
static bool checkContextSupports(const std::shared_ptr< detail::context_impl > &ContextImpl, sycl::detail::pi::PiContextInfo InfoQuery)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
@ PI_IMAGE_COPY_DEVICE_TO_DEVICE
@ PI_IMAGE_COPY_DEVICE_TO_HOST
@ PI_IMAGE_COPY_HOST_TO_DEVICE
@ PI_MEM_TYPE_IMAGE_CUBEMAP
@ PI_MEM_TYPE_IMAGE1D_ARRAY
@ PI_MEM_TYPE_IMAGE2D_ARRAY
pi_result piextEnqueueCooperativeKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
C++ wrapper of extern "C" PI interfaces.
std::vector< detail::AccessorImplPtr > MAccStorage
Storage for accessors.
std::vector< detail::EventImplPtr > MEvents
List of events that order the execution of this CG.
std::vector< AccessorImplHost * > MRequirements
List of requirements that specify which memory is needed for the command group to be executed.
std::set< std::uintptr_t > MImageIdentifiers
A struct to describe the properties of an image.
image_channel_order channel_order
image_channel_type channel_type
Opaque image memory handle type.
Opaque interop semaphore handle type.
raw_handle_type raw_handle