38 inline namespace _V1 {
51 if (DstPtrType == sycl::usm::alloc::device) {
53 if (SrcPtrType == sycl::usm::alloc::device)
54 return UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE;
55 if (SrcPtrType == sycl::usm::alloc::host ||
56 SrcPtrType == sycl::usm::alloc::unknown)
57 return UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE;
59 "Unknown copy source location");
61 if (DstPtrType == sycl::usm::alloc::host ||
62 DstPtrType == sycl::usm::alloc::unknown) {
64 if (SrcPtrType == sycl::usm::alloc::device)
65 return UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST;
66 if (SrcPtrType == sycl::usm::alloc::host ||
67 SrcPtrType == sycl::usm::alloc::unknown)
69 "Cannot copy image from host to host");
71 "Unknown copy source location");
74 "Unknown copy destination location");
85 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
86 bool CallerNeedsEvent)
87 :
handler(Queue, Queue, nullptr, CallerNeedsEvent) {}
89 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
90 std::shared_ptr<detail::queue_impl> PrimaryQueue,
91 std::shared_ptr<detail::queue_impl> SecondaryQueue,
92 bool CallerNeedsEvent)
93 : impl(
std::make_shared<detail::handler_impl>(
std::move(PrimaryQueue),
94 std::move(SecondaryQueue),
96 MQueue(
std::move(Queue)) {}
99 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph)
100 : impl(
std::make_shared<detail::handler_impl>(Graph)) {}
105 void handler::setStateExplicitKernelBundle() {
106 impl->setStateExplicitKernelBundle();
112 void handler::setStateSpecConstSet() { impl->setStateSpecConstSet(); }
116 bool handler::isStateExplicitKernelBundle()
const {
117 return impl->isStateExplicitKernelBundle();
124 std::shared_ptr<detail::kernel_bundle_impl>
125 handler::getOrInsertHandlerKernelBundle(
bool Insert)
const {
126 if (!impl->MKernelBundle && Insert) {
128 impl->MGraph ? impl->MGraph->getContext() : MQueue->get_context();
129 auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device();
131 get_kernel_bundle<bundle_state::input>(Ctx, {Dev}, {}));
133 return impl->MKernelBundle;
137 void handler::setHandlerKernelBundle(
138 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
139 impl->MKernelBundle = NewKernelBundleImpPtr;
142 void handler::setHandlerKernelBundle(kernel Kernel) {
146 std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpl =
148 setHandlerKernelBundle(KernelBundleImpl);
151 event handler::finalize() {
162 for (
const auto &
arg : impl->MArgs) {
168 if (AccImpl->MIsPlaceH) {
169 auto It = std::find(impl->CGData.MRequirements.begin(),
170 impl->CGData.MRequirements.end(), AccImpl);
171 if (It == impl->CGData.MRequirements.end())
173 "placeholder accessor must be bound by calling "
174 "handler::require() before it can be used.");
177 bool AccFound =
false;
178 for (detail::ArgDesc &Acc : impl->MAssociatedAccesors) {
188 "placeholder accessor must be bound by calling "
189 "handler::require() before it can be used.");
195 const auto &type = getType();
198 std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
199 getOrInsertHandlerKernelBundle(
false);
200 if (KernelBundleImpPtr) {
202 if (!KernelBundleImpPtr->isInterop() &&
203 !impl->isStateExplicitKernelBundle()) {
205 impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device();
208 MKernelName.
c_str());
209 bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
212 if (!KernelInserted &&
215 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
217 kernel_bundle<bundle_state::executable> ExecKernelBundle =
220 setHandlerKernelBundle(KernelBundleImpPtr);
221 KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
226 "Failed to add kernel to kernel bundle.");
229 switch (KernelBundleImpPtr->get_bundle_state()) {
232 kernel_bundle<bundle_state::executable> ExecBundle =
build(
234 KernelBundleImpPtr));
236 setHandlerKernelBundle(KernelBundleImpPtr);
244 assert(0 &&
"Expected that the bundle is either in input or executable "
250 if (MQueue && !impl->MGraph && !impl->MSubgraphNode &&
251 !MQueue->getCommandGraph() && !MQueue->is_in_fusion_mode() &&
252 !impl->CGData.MRequirements.size() && !MStreamStorage.size() &&
253 (!impl->CGData.MEvents.size() ||
254 (MQueue->isInOrder() &&
256 impl->CGData.MEvents, MQueue->getContextImplPtr())))) {
262 std::vector<ur_event_handle_t> RawEvents;
265 #ifdef XPTI_ENABLE_INSTRUMENTATION
268 auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
269 StreamID, MKernel, MCodeLoc, MKernelName.
c_str(), MQueue,
270 impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs);
271 auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
272 InstanceID = InstanceID]() {
274 auto EnqueueKernel = [&]() {
276 #ifdef XPTI_ENABLE_INSTRUMENTATION
277 detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
278 xpti::trace_task_begin,
nullptr);
280 const detail::RTDeviceBinaryImage *BinImage =
nullptr;
284 assert(BinImage &&
"Failed to obtain a binary image.");
287 KernelBundleImpPtr, MKernel, MKernelName.
c_str(),
288 RawEvents, NewEvent,
nullptr, impl->MKernelCacheConfig,
289 impl->MKernelIsCooperative,
290 impl->MKernelUsesClusterLaunch, BinImage);
291 #ifdef XPTI_ENABLE_INSTRUMENTATION
293 if (NewEvent !=
nullptr) {
294 detail::emitInstrumentationGeneral(
295 StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
296 static_cast<const void *
>(NewEvent->getHandleRef()));
298 detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
299 xpti::trace_task_end,
nullptr);
303 bool DiscardEvent = (MQueue->MDiscardEvents || !impl->MEventNeeded) &&
304 MQueue->supportsDiscardingPiEvents();
307 bool KernelUsesAssert =
308 !(MKernel && MKernel->isInterop()) &&
310 MKernelName.
c_str());
311 DiscardEvent = !KernelUsesAssert;
316 auto EventImpl = std::make_shared<detail::event_impl>(
318 MLastEvent = detail::createSyclObjFromImpl<event>(EventImpl);
320 NewEvent = std::make_shared<detail::event_impl>(MQueue);
321 NewEvent->setWorkerQueue(MQueue);
322 NewEvent->setContextImpl(MQueue->getContextImplPtr());
323 NewEvent->setStateIncomplete();
324 NewEvent->setSubmissionTime();
327 if (NewEvent->isHost() || NewEvent->getHandleRef() ==
nullptr)
328 NewEvent->setComplete();
329 NewEvent->setEnqueued();
331 MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
337 std::unique_ptr<detail::CG> CommandGroup;
343 CommandGroup.reset(
new detail::CGExecKernel(
344 std::move(impl->MNDRDesc), std::move(MHostKernel), std::move(MKernel),
345 std::move(impl->MKernelBundle), std::move(impl->CGData),
346 std::move(impl->MArgs), MKernelName.
c_str(), std::move(MStreamStorage),
347 std::move(impl->MAuxiliaryResources), getType(),
348 impl->MKernelCacheConfig, impl->MKernelIsCooperative,
349 impl->MKernelUsesClusterLaunch, MCodeLoc));
356 new detail::CGCopy(getType(), MSrcPtr, MDstPtr, std::move(impl->CGData),
357 std::move(impl->MAuxiliaryResources), MCodeLoc));
360 CommandGroup.reset(
new detail::CGFill(std::move(MPattern), MDstPtr,
361 std::move(impl->CGData), MCodeLoc));
365 new detail::CGUpdateHost(MDstPtr, std::move(impl->CGData), MCodeLoc));
368 CommandGroup.reset(
new detail::CGCopyUSM(
369 MSrcPtr, MDstPtr, MLength, std::move(impl->CGData), MCodeLoc));
372 CommandGroup.reset(
new detail::CGFillUSM(std::move(MPattern), MDstPtr,
373 MLength, std::move(impl->CGData),
377 CommandGroup.reset(
new detail::CGPrefetchUSM(
378 MDstPtr, MLength, std::move(impl->CGData), MCodeLoc));
381 CommandGroup.reset(
new detail::CGAdviseUSM(MDstPtr, MLength, impl->MAdvice,
382 std::move(impl->CGData),
383 getType(), MCodeLoc));
386 CommandGroup.reset(
new detail::CGCopy2DUSM(
387 MSrcPtr, MDstPtr, impl->MSrcPitch, impl->MDstPitch, impl->MWidth,
388 impl->MHeight, std::move(impl->CGData), MCodeLoc));
391 CommandGroup.reset(
new detail::CGFill2DUSM(
392 std::move(MPattern), MDstPtr, impl->MDstPitch, impl->MWidth,
393 impl->MHeight, std::move(impl->CGData), MCodeLoc));
396 CommandGroup.reset(
new detail::CGMemset2DUSM(
397 MPattern[0], MDstPtr, impl->MDstPitch, impl->MWidth, impl->MHeight,
398 std::move(impl->CGData), MCodeLoc));
402 auto context = impl->MGraph
404 : MQueue->getContextImplPtr();
405 CommandGroup.reset(
new detail::CGHostTask(
406 std::move(impl->MHostTask), MQueue, context, std::move(impl->MArgs),
407 std::move(impl->CGData), getType(), MCodeLoc));
412 if (
auto GraphImpl = getCommandGraph(); GraphImpl !=
nullptr) {
413 impl->CGData.MEvents.insert(std::end(impl->CGData.MEvents),
414 std::begin(impl->MEventsWaitWithBarrier),
415 std::end(impl->MEventsWaitWithBarrier));
420 std::move(impl->CGData), MCodeLoc));
423 new detail::CGBarrier(std::move(impl->MEventsWaitWithBarrier),
424 std::move(impl->CGData), getType(), MCodeLoc));
430 new detail::CGProfilingTag(std::move(impl->CGData), MCodeLoc));
434 CommandGroup.reset(
new detail::CGCopyToDeviceGlobal(
435 MSrcPtr, MDstPtr, impl->MIsDeviceImageScoped, MLength, impl->MOffset,
436 std::move(impl->CGData), MCodeLoc));
440 CommandGroup.reset(
new detail::CGCopyFromDeviceGlobal(
441 MSrcPtr, MDstPtr, impl->MIsDeviceImageScoped, MLength, impl->MOffset,
442 std::move(impl->CGData), MCodeLoc));
446 CommandGroup.reset(
new detail::CGReadWriteHostPipe(
447 impl->HostPipeName, impl->HostPipeBlocking, impl->HostPipePtr,
448 impl->HostPipeTypeSize, impl->HostPipeRead, std::move(impl->CGData),
453 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> ParentGraph =
454 MQueue ? MQueue->getCommandGraph() : impl->MGraph;
462 ParentGraph->MMutex);
464 impl->CGData.MRequirements = impl->MExecGraph->getRequirements();
468 CommandGroup.reset(
new sycl::detail::CGExecCommandBuffer(
469 nullptr, impl->MExecGraph, std::move(impl->CGData)));
472 event GraphCompletionEvent =
473 impl->MExecGraph->enqueue(MQueue, std::move(impl->CGData));
474 MLastEvent = GraphCompletionEvent;
479 CommandGroup.reset(
new detail::CGCopyImage(
480 MSrcPtr, MDstPtr, impl->MSrcImageDesc, impl->MDstImageDesc,
481 impl->MSrcImageFormat, impl->MDstImageFormat, impl->MImageCopyFlags,
482 impl->MSrcOffset, impl->MDestOffset, impl->MCopyExtent,
483 std::move(impl->CGData), MCodeLoc));
486 CommandGroup.reset(
new detail::CGSemaphoreWait(
487 impl->MInteropSemaphoreHandle, impl->MWaitValue,
488 std::move(impl->CGData), MCodeLoc));
491 CommandGroup.reset(
new detail::CGSemaphoreSignal(
492 impl->MInteropSemaphoreHandle, impl->MSignalValue,
493 std::move(impl->CGData), MCodeLoc));
497 std::cout <<
"WARNING: An empty command group is submitted." << std::endl;
503 if (impl->MGraph || (MQueue && MQueue->getCommandGraph())) {
505 std::move(impl->CGData), MCodeLoc));
508 MLastEvent = detail::createSyclObjFromImpl<event>(Event);
516 "Internal Error. Command group cannot be constructed.");
522 impl->MGraphNodeCG = std::move(CommandGroup);
523 return detail::createSyclObjFromImpl<event>(
524 std::make_shared<detail::event_impl>());
529 if (
auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) {
530 auto EventImpl = std::make_shared<detail::event_impl>();
531 EventImpl->setSubmittedQueue(MQueue);
532 std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
542 ? impl->MUserFacingNodeType
546 if (MQueue->isInOrder()) {
550 auto DependentNode = GraphImpl->getLastInorderNode(MQueue);
552 NodeImpl = DependentNode
553 ? GraphImpl->add(NodeType, std::move(CommandGroup),
555 : GraphImpl->add(NodeType, std::move(CommandGroup));
560 GraphImpl->setLastInorderNode(MQueue, NodeImpl);
562 auto LastBarrierRecordedFromQueue = GraphImpl->getBarrierDep(MQueue);
563 if (LastBarrierRecordedFromQueue) {
564 NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup),
565 {LastBarrierRecordedFromQueue});
567 NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup));
570 if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) {
571 GraphImpl->setBarrierDep(MQueue, NodeImpl);
576 GraphImpl->addEventForNode(GraphImpl, EventImpl, NodeImpl);
578 NodeImpl->MNDRangeUsed = impl->MNDRangeUsed;
580 return detail::createSyclObjFromImpl<event>(EventImpl);
584 std::move(CommandGroup), std::move(MQueue), impl->MEventNeeded);
586 MLastEvent = detail::createSyclObjFromImpl<event>(Event);
590 void handler::addReduction(
const std::shared_ptr<const void> &ReduObj) {
591 impl->MAuxiliaryResources.push_back(ReduObj);
596 if (getCommandGraph() &&
597 static_cast<detail::SYCLMemObjT *
>(AccImpl->MSYCLMemObj)
598 ->needsWriteBack()) {
600 "Accessors to buffers which have write_back enabled "
601 "are not allowed to be used in command graphs.");
604 if (Req->MAccessMode != sycl::access_mode::read) {
605 auto SYCLMemObj =
static_cast<detail::SYCLMemObjT *
>(Req->MSYCLMemObj);
606 SYCLMemObj->handleWriteAccessorCreation();
609 if (Req->MAccessRange.size() != 0)
610 impl->CGData.MRequirements.push_back(Req);
612 impl->CGData.MAccStorage.push_back(std::move(AccImpl));
615 impl->MAssociatedAccesors.emplace_back(
619 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
622 static_cast<int>(AccTarget));
625 void handler::associateWithHandler(
626 detail::UnsampledImageAccessorBaseHost *AccBase,
image_target AccTarget) {
628 static_cast<int>(AccTarget));
631 void handler::associateWithHandler(
632 detail::SampledImageAccessorBaseHost *AccBase,
image_target AccTarget) {
634 static_cast<int>(AccTarget));
638 size_t &IndexShift,
int Size,
639 bool IsKernelCreatedFromSource,
641 std::vector<detail::ArgDesc> &Args,
645 AccImpl->resize(GlobalSize);
647 Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
653 if (!isESIMD && !IsKernelCreatedFromSource) {
656 const size_t SizeAccField =
657 sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
659 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
660 &AccImpl->MAccessRange[0], SizeAccField,
663 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
664 &AccImpl->MMemoryRange[0], SizeAccField,
667 Args.emplace_back(kernel_param_kind_t::kind_std_layout,
668 &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
673 const int Size,
const size_t Index,
size_t &IndexShift,
674 bool IsKernelCreatedFromSource,
bool IsESIMD) {
678 case kernel_param_kind_t::kind_std_layout:
679 case kernel_param_kind_t::kind_pointer: {
680 addArg(Kind, Ptr, Size, Index + IndexShift);
683 case kernel_param_kind_t::kind_stream: {
687 detail::AccessorBaseHost *GBufBase =
688 static_cast<detail::AccessorBaseHost *
>(&S->GlobalBuf);
692 GBufReq, Index, IndexShift, Size, IsKernelCreatedFromSource,
693 impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD);
695 detail::AccessorBaseHost *GOffsetBase =
696 static_cast<detail::AccessorBaseHost *
>(&S->GlobalOffset);
700 GOffsetReq, Index, IndexShift, Size, IsKernelCreatedFromSource,
701 impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD);
703 detail::AccessorBaseHost *GFlushBase =
704 static_cast<detail::AccessorBaseHost *
>(&S->GlobalFlushBuf);
708 size_t GlobalSize = impl->MNDRDesc.GlobalSize.size();
714 if (GlobalSize == 0) {
716 GlobalSize = impl->MNDRDesc.NumWorkGroups.size();
719 IsKernelCreatedFromSource, GlobalSize, impl->MArgs,
722 addArg(kernel_param_kind_t::kind_std_layout, &S->FlushBufferSize,
723 sizeof(S->FlushBufferSize), Index + IndexShift);
727 case kernel_param_kind_t::kind_accessor: {
734 case access::target::constant_buffer: {
737 AccImpl, Index, IndexShift, Size, IsKernelCreatedFromSource,
738 impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD);
741 case access::target::local: {
742 detail::LocalAccessorImplHost *LAcc =
743 static_cast<detail::LocalAccessorImplHost *
>(Ptr);
745 range<3> &Size = LAcc->MSize;
746 const int Dims = LAcc->MDims;
747 int SizeInBytes = LAcc->MElemSize;
748 for (
int I = 0; I < Dims; ++I)
749 SizeInBytes *= Size[I];
752 SizeInBytes = std::max(SizeInBytes, 1);
753 impl->MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
nullptr,
754 SizeInBytes, Index + IndexShift);
758 if (!IsESIMD && !IsKernelCreatedFromSource) {
760 const size_t SizeAccField = (Dims == 0 ? 1 : Dims) *
sizeof(Size[0]);
761 addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField,
764 addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField,
767 addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField,
775 addArg(Kind, AccImpl, Size, Index + IndexShift);
776 if (!IsKernelCreatedFromSource) {
784 case access::target::host_buffer: {
786 "Unsupported accessor target case.");
792 case kernel_param_kind_t::kind_sampler: {
793 addArg(kernel_param_kind_t::kind_sampler, Ptr,
sizeof(sampler),
797 case kernel_param_kind_t::kind_specialization_constants_buffer: {
798 addArg(kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
802 case kernel_param_kind_t::kind_invalid:
804 "Invalid kernel param kind");
819 void handler::extractArgsAndReqs() {
820 assert(MKernel &&
"MKernel is not initialized");
821 std::vector<detail::ArgDesc> UnPreparedArgs = std::move(impl->MArgs);
825 UnPreparedArgs.begin(), UnPreparedArgs.end(),
826 [](
const detail::ArgDesc &first,
const detail::ArgDesc &second) ->
bool {
827 return (first.MIndex < second.MIndex);
830 const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
833 size_t IndexShift = 0;
834 for (
size_t I = 0; I < UnPreparedArgs.size(); ++I) {
835 void *Ptr = UnPreparedArgs[I].MPtr;
837 const int &Size = UnPreparedArgs[I].MSize;
838 const int Index = UnPreparedArgs[I].MIndex;
839 processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
844 void handler::extractArgsAndReqsFromLambda(
845 char *LambdaPtr,
size_t KernelArgsNum,
846 const detail::kernel_param_desc_t *KernelArgs,
bool IsESIMD) {
847 const bool IsKernelCreatedFromSource =
false;
848 size_t IndexShift = 0;
851 for (
size_t I = 0; I < KernelArgsNum; ++I) {
852 void *Ptr = LambdaPtr + KernelArgs[I].offset;
854 const int &Size = KernelArgs[I].info;
861 AccTarget == access::target::constant_buffer) ||
864 detail::AccessorBaseHost *AccBase =
865 static_cast<detail::AccessorBaseHost *
>(Ptr);
867 }
else if (AccTarget == access::target::local) {
868 detail::LocalAccessorBaseHost *LocalAccBase =
869 static_cast<detail::LocalAccessorBaseHost *
>(Ptr);
873 processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
881 detail::string handler::getKernelName() {
882 return detail::string{MKernel->get_info<info::kernel::function_name>()};
885 void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) {
886 auto UsedKernelBundleImplPtr =
887 getOrInsertHandlerKernelBundle(
false);
888 if (!UsedKernelBundleImplPtr)
892 if (!impl->isStateExplicitKernelBundle())
896 device Dev = impl->MGraph ? impl->MGraph->getDevice()
898 if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
901 "The kernel bundle in use does not contain the kernel");
905 throwIfActionIsCreated();
907 impl->MEventsWaitWithBarrier.reserve(WaitList.size());
908 for (
auto &Event : WaitList) {
912 if (EventImpl->isHost()) {
915 impl->MEventsWaitWithBarrier.push_back(EventImpl);
920 bool handler::DisableRangeRounding() {
924 bool handler::RangeRoundingTrace() {
928 void handler::GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
930 SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
931 MinFactor, GoodFactor, MinRange);
935 throwIfActionIsCreated();
936 MSrcPtr =
const_cast<void *
>(Src);
943 throwIfActionIsCreated();
945 MPattern.push_back(
static_cast<char>(Value));
952 throwIfActionIsCreated();
953 MDstPtr =
const_cast<void *
>(Ptr);
959 throwIfActionIsCreated();
960 MDstPtr =
const_cast<void *
>(Ptr);
962 impl->MAdvice =
static_cast<ur_usm_advice_flags_t
>(Advice);
966 void handler::fill_impl(
void *Dest,
const void *Value,
size_t ValueSize,
969 MPattern.resize(ValueSize);
970 std::memcpy(MPattern.data(), Value, ValueSize);
971 MLength = Count * ValueSize;
975 void handler::ext_oneapi_memcpy2d_impl(
void *Dest,
size_t DestPitch,
976 const void *Src,
size_t SrcPitch,
977 size_t Width,
size_t Height) {
979 MSrcPtr =
const_cast<void *
>(Src);
981 impl->MSrcPitch = SrcPitch;
982 impl->MDstPitch = DestPitch;
983 impl->MWidth = Width;
984 impl->MHeight = Height;
988 void handler::ext_oneapi_fill2d_impl(
void *Dest,
size_t DestPitch,
989 const void *Value,
size_t ValueSize,
990 size_t Width,
size_t Height) {
993 MPattern.resize(ValueSize);
994 std::memcpy(MPattern.data(), Value, ValueSize);
995 impl->MDstPitch = DestPitch;
996 impl->MWidth = Width;
997 impl->MHeight = Height;
1001 void handler::ext_oneapi_memset2d_impl(
void *Dest,
size_t DestPitch,
int Value,
1002 size_t Width,
size_t Height) {
1005 MPattern.push_back(
static_cast<unsigned char>(Value));
1006 impl->MDstPitch = DestPitch;
1007 impl->MWidth = Width;
1008 impl->MHeight = Height;
1015 throwIfGraphAssociated<
1016 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1017 sycl_ext_oneapi_bindless_images>();
1020 MSrcPtr =
const_cast<void *
>(Src);
1021 MDstPtr =
reinterpret_cast<void *
>(Dest.
raw_handle);
1023 ur_image_desc_t UrDesc = {};
1024 UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
1025 UrDesc.width = Desc.
width;
1026 UrDesc.height = Desc.
height;
1027 UrDesc.depth = Desc.
depth;
1033 Desc.
height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY;
1037 Desc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1038 ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
1041 UrDesc.type = Desc.
depth > 0 ? UR_MEM_TYPE_IMAGE3D
1042 : (Desc.
height > 0 ? UR_MEM_TYPE_IMAGE2D
1043 : UR_MEM_TYPE_IMAGE1D);
1046 ur_image_format_t UrFormat;
1047 UrFormat.channelType =
1053 impl->MSrcOffset = {0, 0, 0};
1054 impl->MDestOffset = {0, 0, 0};
1056 impl->MSrcImageDesc = UrDesc;
1057 impl->MDstImageDesc = UrDesc;
1058 impl->MSrcImageFormat = UrFormat;
1059 impl->MDstImageFormat = UrFormat;
1060 impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE;
1069 throwIfGraphAssociated<
1070 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1071 sycl_ext_oneapi_bindless_images>();
1074 MSrcPtr =
const_cast<void *
>(Src);
1075 MDstPtr =
reinterpret_cast<void *
>(Dest.
raw_handle);
1077 ur_image_desc_t UrDesc = {};
1078 UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
1079 UrDesc.width = DestImgDesc.
width;
1080 UrDesc.height = DestImgDesc.
height;
1081 UrDesc.depth = DestImgDesc.
depth;
1086 UrDesc.type = DestImgDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY
1087 : UR_MEM_TYPE_IMAGE1D_ARRAY;
1091 DestImgDesc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1092 ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
1095 UrDesc.type = DestImgDesc.
depth > 0
1096 ? UR_MEM_TYPE_IMAGE3D
1097 : (DestImgDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D
1098 : UR_MEM_TYPE_IMAGE1D);
1101 ur_image_format_t UrFormat;
1102 UrFormat.channelType =
1108 impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1109 impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1110 impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1111 impl->MSrcImageDesc = UrDesc;
1112 impl->MSrcImageDesc.width = SrcExtent[0];
1113 impl->MSrcImageDesc.height = SrcExtent[1];
1114 impl->MSrcImageDesc.depth = SrcExtent[2];
1115 impl->MDstImageDesc = UrDesc;
1116 impl->MSrcImageFormat = UrFormat;
1117 impl->MDstImageFormat = UrFormat;
1118 impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE;
1125 throwIfGraphAssociated<
1126 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1127 sycl_ext_oneapi_bindless_images>();
1130 MSrcPtr =
reinterpret_cast<void*
>(Src.
raw_handle);
1133 ur_image_desc_t UrDesc = {};
1134 UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
1135 UrDesc.width = Desc.
width;
1136 UrDesc.height = Desc.
height;
1137 UrDesc.depth = Desc.
depth;
1143 Desc.
height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY;
1147 Desc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1148 ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
1151 UrDesc.type = Desc.
depth > 0 ? UR_MEM_TYPE_IMAGE3D
1152 : (Desc.
height > 0 ? UR_MEM_TYPE_IMAGE2D
1153 : UR_MEM_TYPE_IMAGE1D);
1156 ur_image_format_t UrFormat;
1157 UrFormat.channelType =
1163 impl->MSrcOffset = {0, 0, 0};
1164 impl->MDestOffset = {0, 0, 0};
1166 impl->MSrcImageDesc = UrDesc;
1167 impl->MDstImageDesc = UrDesc;
1168 impl->MSrcImageFormat = UrFormat;
1169 impl->MDstImageFormat = UrFormat;
1170 impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST;
1178 throwIfGraphAssociated<
1179 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1180 sycl_ext_oneapi_bindless_images>();
1183 MSrcPtr =
reinterpret_cast<void*
>(Src.
raw_handle);
1184 MDstPtr =
reinterpret_cast<void*
>(Dest.
raw_handle);
1186 ur_image_desc_t UrDesc = {};
1187 UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
1188 UrDesc.width = ImageDesc.
width;
1189 UrDesc.height = ImageDesc.
height;
1190 UrDesc.depth = ImageDesc.
depth;
1194 UrDesc.type = ImageDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY
1195 : UR_MEM_TYPE_IMAGE1D_ARRAY;
1199 ImageDesc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1200 ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
1203 UrDesc.type = ImageDesc.
depth > 0
1204 ? UR_MEM_TYPE_IMAGE3D
1205 : (ImageDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D
1206 : UR_MEM_TYPE_IMAGE1D);
1209 ur_image_format_t UrFormat;
1210 UrFormat.channelType =
1216 impl->MSrcOffset = {0, 0, 0};
1217 impl->MDestOffset = {0, 0, 0};
1219 impl->MSrcImageDesc = UrDesc;
1220 impl->MDstImageDesc = UrDesc;
1221 impl->MSrcImageFormat = UrFormat;
1222 impl->MDstImageFormat = UrFormat;
1223 impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE;
1233 throwIfGraphAssociated<
1234 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1235 sycl_ext_oneapi_bindless_images>();
1238 MSrcPtr =
reinterpret_cast<void*
>(Src.
raw_handle);
1241 ur_image_desc_t UrDesc = {};
1242 UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
1243 UrDesc.width = SrcImgDesc.
width;
1244 UrDesc.height = SrcImgDesc.
height;
1245 UrDesc.depth = SrcImgDesc.
depth;
1250 UrDesc.type = SrcImgDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY
1251 : UR_MEM_TYPE_IMAGE1D_ARRAY;
1255 SrcImgDesc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1256 ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
1259 UrDesc.type = SrcImgDesc.
depth > 0
1260 ? UR_MEM_TYPE_IMAGE3D
1261 : (SrcImgDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D
1262 : UR_MEM_TYPE_IMAGE1D);
1265 ur_image_format_t UrFormat;
1266 UrFormat.channelType =
1272 impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1273 impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1274 impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1275 impl->MSrcImageDesc = UrDesc;
1276 impl->MDstImageDesc = UrDesc;
1277 impl->MDstImageDesc.width = DestExtent[0];
1278 impl->MDstImageDesc.height = DestExtent[1];
1279 impl->MDstImageDesc.depth = DestExtent[2];
1280 impl->MSrcImageFormat = UrFormat;
1281 impl->MDstImageFormat = UrFormat;
1282 impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST;
1287 const void *Src,
void *Dest,
1289 throwIfGraphAssociated<
1290 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1291 sycl_ext_oneapi_bindless_images>();
1294 MSrcPtr =
const_cast<void *
>(Src);
1297 ur_image_desc_t UrDesc = {};
1298 UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
1299 UrDesc.width = Desc.
width;
1300 UrDesc.height = Desc.
height;
1301 UrDesc.depth = Desc.
depth;
1307 Desc.
height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY;
1311 Desc.
type == sycl::ext::oneapi::experimental::image_type::cubemap
1312 ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
1315 UrDesc.type = Desc.
depth > 0 ? UR_MEM_TYPE_IMAGE3D
1316 : (Desc.
height > 0 ? UR_MEM_TYPE_IMAGE2D
1317 : UR_MEM_TYPE_IMAGE1D);
1320 ur_image_format_t UrFormat;
1321 UrFormat.channelType =
1327 impl->MSrcOffset = {0, 0, 0};
1328 impl->MDestOffset = {0, 0, 0};
1330 impl->MSrcImageDesc = UrDesc;
1331 impl->MDstImageDesc = UrDesc;
1332 impl->MSrcImageFormat = UrFormat;
1333 impl->MDstImageFormat = UrFormat;
1334 impl->MSrcImageDesc.rowPitch = Pitch;
1335 impl->MDstImageDesc.rowPitch = Pitch;
1348 throwIfGraphAssociated<
1349 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1350 sycl_ext_oneapi_bindless_images>();
1353 MSrcPtr =
const_cast<void *
>(Src);
1356 ur_image_desc_t UrDesc = {};
1357 UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
1358 UrDesc.width = DeviceImgDesc.
width;
1359 UrDesc.height = DeviceImgDesc.
height;
1360 UrDesc.depth = DeviceImgDesc.
depth;
1365 UrDesc.type = DeviceImgDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY
1366 : UR_MEM_TYPE_IMAGE1D_ARRAY;
1369 UrDesc.type = DeviceImgDesc.
type ==
1370 sycl::ext::oneapi::experimental::image_type::cubemap
1371 ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
1374 UrDesc.type = DeviceImgDesc.
depth > 0
1375 ? UR_MEM_TYPE_IMAGE3D
1376 : (DeviceImgDesc.
height > 0 ? UR_MEM_TYPE_IMAGE2D
1377 : UR_MEM_TYPE_IMAGE1D);
1380 ur_image_format_t UrFormat;
1381 UrFormat.channelType =
1387 impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1388 impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1389 impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1390 impl->MSrcImageFormat = UrFormat;
1391 impl->MDstImageFormat = UrFormat;
1395 impl->MSrcImageDesc = UrDesc;
1396 impl->MDstImageDesc = UrDesc;
1399 if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
1400 impl->MDstImageDesc.rowPitch = DeviceRowPitch;
1401 impl->MSrcImageDesc.rowPitch = 0;
1402 impl->MSrcImageDesc.width = HostExtent[0];
1403 impl->MSrcImageDesc.height = HostExtent[1];
1404 impl->MSrcImageDesc.depth = HostExtent[2];
1405 }
else if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
1406 impl->MSrcImageDesc.rowPitch = DeviceRowPitch;
1407 impl->MDstImageDesc.rowPitch = 0;
1408 impl->MDstImageDesc.width = HostExtent[0];
1409 impl->MDstImageDesc.height = HostExtent[1];
1410 impl->MDstImageDesc.depth = HostExtent[2];
1412 impl->MDstImageDesc.rowPitch = DeviceRowPitch;
1413 impl->MSrcImageDesc.rowPitch = DeviceRowPitch;
1421 throwIfGraphAssociated<
1422 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1423 sycl_ext_oneapi_bindless_images>();
1432 "Invalid type of semaphore for this operation. The "
1433 "type of semaphore used needs a user passed wait value.");
1435 impl->MInteropSemaphoreHandle =
1436 (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.
raw_handle;
1437 impl->MWaitValue = {};
1443 uint64_t WaitValue) {
1444 throwIfGraphAssociated<
1445 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1446 sycl_ext_oneapi_bindless_images>();
1449 win32_nt_dx12_fence) {
1452 "Invalid type of semaphore for this operation. The "
1453 "type of semaphore does not support user passed wait values.");
1455 impl->MInteropSemaphoreHandle =
1456 (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.
raw_handle;
1457 impl->MWaitValue = WaitValue;
1463 throwIfGraphAssociated<
1464 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1465 sycl_ext_oneapi_bindless_images>();
1474 "Invalid type of semaphore for this operation. The "
1475 "type of semaphore used needs a user passed signal value.");
1477 impl->MInteropSemaphoreHandle =
1478 (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.
raw_handle;
1479 impl->MSignalValue = {};
1485 uint64_t SignalValue) {
1486 throwIfGraphAssociated<
1487 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1488 sycl_ext_oneapi_bindless_images>();
1491 win32_nt_dx12_fence) {
1494 "Invalid type of semaphore for this operation. The "
1495 "type of semaphore does not support user passed signal values.");
1497 impl->MInteropSemaphoreHandle =
1498 (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.
raw_handle;
1499 impl->MSignalValue = SignalValue;
1505 std::shared_ptr<detail::queue_impl> PrimaryQueue =
1506 impl->MSubmissionPrimaryQueue;
1507 if ((!impl->MGraph &&
1508 (PrimaryQueue->get_context() != ExecBundle.
get_context())) ||
1510 (impl->MGraph->getContext() != ExecBundle.
get_context())))
1513 "Context associated with the primary queue is different from the "
1514 "context associated with the kernel bundle");
1516 std::shared_ptr<detail::queue_impl> SecondaryQueue =
1517 impl->MSubmissionSecondaryQueue;
1518 if (SecondaryQueue &&
1519 SecondaryQueue->get_context() != ExecBundle.
get_context())
1522 "Context associated with the secondary queue is different from the "
1523 "context associated with the kernel bundle");
1525 setStateExplicitKernelBundle();
1535 for (
const event &Event : Events) {
1543 if (EventImpl->isDiscarded()) {
1545 "Queue operation cannot depend on discarded event.");
1547 if (
auto Graph = getCommandGraph(); Graph) {
1548 auto EventGraph = EventImpl->getCommandGraph();
1549 if (EventGraph ==
nullptr) {
1552 "Graph nodes cannot depend on events from outside the graph.");
1554 if (EventGraph != Graph) {
1557 "Graph nodes cannot depend on events from another graph.");
1560 impl->CGData.MEvents.push_back(EventImpl);
1571 ur_context_info_t InfoQuery) {
1572 auto &Plugin = ContextImpl->getPlugin();
1573 ur_bool_t SupportsOp =
false;
1574 Plugin->call(urContextGetInfo, ContextImpl->getHandleRef(), InfoQuery,
1575 sizeof(ur_bool_t), &SupportsOp,
nullptr);
1579 void handler::verifyDeviceHasProgressGuarantee(
1584 using forward_progress =
1586 auto deviceImplPtr = MQueue->getDeviceImplPtr();
1587 const bool supported = deviceImplPtr->supportsForwardProgress(
1588 guarantee, threadScope, coordinationScope);
1589 if (threadScope == execution_scope::work_group) {
1592 sycl::errc::feature_not_supported,
1593 "Required progress guarantee for work groups is not "
1594 "supported by this device.");
1605 if (guarantee == forward_progress::concurrent)
1606 setKernelIsCooperative(
true);
1607 }
else if (threadScope == execution_scope::sub_group) {
1610 "Required progress guarantee for sub groups is not "
1611 "supported by this device.");
1614 if (guarantee == forward_progress::concurrent)
1615 setKernelIsCooperative(
true);
1620 "Required progress guarantee for work items is not "
1621 "supported by this device.");
1626 bool handler::supportsUSMMemcpy2D() {
1627 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1628 {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) {
1631 UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT))
1637 bool handler::supportsUSMFill2D() {
1638 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1639 {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) {
1641 UR_CONTEXT_INFO_USM_FILL2D_SUPPORT))
1647 bool handler::supportsUSMMemset2D() {
1648 for (
const std::shared_ptr<detail::queue_impl> &QueueImpl :
1649 {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) {
1651 UR_CONTEXT_INFO_USM_FILL2D_SUPPORT))
1657 id<2> handler::computeFallbackKernelBounds(
size_t Width,
size_t Height) {
1658 device Dev = MQueue->get_device();
1659 range<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
1660 Dev.get_info<info::device::max_compute_units>();
1661 return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
1664 backend handler::getDeviceBackend()
const {
1666 return impl->MGraph->getDevice().get_backend();
1668 return MQueue->getDeviceImplPtr()->getBackend();
1671 void handler::ext_intel_read_host_pipe(detail::string_view Name,
void *Ptr,
1672 size_t Size,
bool Block) {
1673 impl->HostPipeName = Name.data();
1674 impl->HostPipePtr = Ptr;
1675 impl->HostPipeTypeSize = Size;
1676 impl->HostPipeBlocking = Block;
1677 impl->HostPipeRead = 1;
1681 void handler::ext_intel_write_host_pipe(detail::string_view Name,
void *Ptr,
1682 size_t Size,
bool Block) {
1683 impl->HostPipeName = Name.data();
1684 impl->HostPipePtr = Ptr;
1685 impl->HostPipeTypeSize = Size;
1686 impl->HostPipeBlocking = Block;
1687 impl->HostPipeRead = 0;
1691 void handler::memcpyToDeviceGlobal(
const void *DeviceGlobalPtr,
const void *Src,
1692 bool IsDeviceImageScoped,
size_t NumBytes,
1694 throwIfActionIsCreated();
1695 MSrcPtr =
const_cast<void *
>(Src);
1696 MDstPtr =
const_cast<void *
>(DeviceGlobalPtr);
1697 impl->MIsDeviceImageScoped = IsDeviceImageScoped;
1699 impl->MOffset = Offset;
1703 void handler::memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
1704 bool IsDeviceImageScoped,
size_t NumBytes,
1706 throwIfActionIsCreated();
1707 MSrcPtr =
const_cast<void *
>(DeviceGlobalPtr);
1709 impl->MIsDeviceImageScoped = IsDeviceImageScoped;
1711 impl->MOffset = Offset;
1715 void handler::memcpyToHostOnlyDeviceGlobal(
const void *DeviceGlobalPtr,
1717 size_t DeviceGlobalTSize,
1718 bool IsDeviceImageScoped,
1719 size_t NumBytes,
size_t Offset) {
1720 std::weak_ptr<detail::context_impl> WeakContextImpl =
1721 MQueue->getContextImplPtr();
1722 std::weak_ptr<detail::device_impl> WeakDeviceImpl =
1723 MQueue->getDeviceImplPtr();
1728 std::shared_ptr<detail::context_impl> ContextImpl = WeakContextImpl.lock();
1729 std::shared_ptr<detail::device_impl> DeviceImpl = WeakDeviceImpl.lock();
1730 if (ContextImpl && DeviceImpl)
1731 ContextImpl->memcpyToHostOnlyDeviceGlobal(
1732 DeviceImpl, DeviceGlobalPtr, Src, DeviceGlobalTSize,
1733 IsDeviceImageScoped, NumBytes, Offset);
1737 void handler::memcpyFromHostOnlyDeviceGlobal(
void *Dest,
1738 const void *DeviceGlobalPtr,
1739 bool IsDeviceImageScoped,
1740 size_t NumBytes,
size_t Offset) {
1741 const std::shared_ptr<detail::context_impl> &ContextImpl =
1742 MQueue->getContextImplPtr();
1743 const std::shared_ptr<detail::device_impl> &DeviceImpl =
1744 MQueue->getDeviceImplPtr();
1749 ContextImpl->memcpyFromHostOnlyDeviceGlobal(
1750 DeviceImpl, Dest, DeviceGlobalPtr, IsDeviceImageScoped, NumBytes,
1755 const std::shared_ptr<detail::context_impl> &
1756 handler::getContextImplPtr()
const {
1757 return MQueue->getContextImplPtr();
1760 void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) {
1762 case handler::StableKernelCacheConfig::Default:
1763 impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT;
1765 case handler::StableKernelCacheConfig::LargeSLM:
1766 impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM;
1768 case handler::StableKernelCacheConfig::LargeData:
1769 impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA;
1774 void handler::setKernelIsCooperative(
bool KernelIsCooperative) {
1775 impl->MKernelIsCooperative = KernelIsCooperative;
1778 void handler::setKernelClusterLaunch(
sycl::range<3> ClusterSize,
int Dims) {
1779 throwIfGraphAssociated<
1780 syclex::detail::UnsupportedGraphFeatures::
1781 sycl_ext_oneapi_experimental_cuda_cluster_launch>();
1782 impl->MKernelUsesClusterLaunch =
true;
1783 impl->MNDRDesc.setClusterDimensions(ClusterSize, Dims);
1794 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1795 handler::getCommandGraph()
const {
1797 return impl->MGraph;
1799 return MQueue->getCommandGraph();
1803 impl->MUserFacingNodeType = Type;
1806 std::optional<std::array<size_t, 3>> handler::getMaxWorkGroups() {
1808 std::array<size_t, 3> UrResult = {};
1809 auto Ret = Dev->getPlugin()->call_nocheck(
1810 urDeviceGetInfo, Dev->getHandleRef(),
1812 ext::oneapi::experimental::info::device::max_work_groups<3>>::value,
1813 sizeof(UrResult), &UrResult,
nullptr);
1814 if (Ret == UR_RESULT_SUCCESS) {
1820 std::tuple<std::array<size_t, 3>,
bool> handler::getMaxWorkGroups_v2() {
1821 auto ImmRess = getMaxWorkGroups();
1823 return {*ImmRess,
true};
1824 return {std::array<size_t, 3>{0, 0, 0},
false};
1827 void handler::setNDRangeUsed(
bool Value) { impl->MNDRangeUsed = Value; }
1829 void handler::registerDynamicParameter(
1830 ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase,
1832 if (MQueue && MQueue->getCommandGraph()) {
1835 "Dynamic Parameters cannot be used with Graph Queue recording.");
1837 if (!impl->MGraph) {
1840 "Dynamic Parameters cannot be used with normal SYCL submissions");
1844 if (Paraimpl->MGraph != this->impl->MGraph) {
1847 "Cannot use a Dynamic Parameter with a node associated with a graph "
1848 "other than the one it was created with.");
1850 impl->MDynamicParameters.emplace_back(Paraimpl.get(), ArgIndex);
1853 bool handler::eventNeeded()
const {
return impl->MEventNeeded; }
1855 void *handler::storeRawArg(
const void *Ptr,
size_t Size) {
1856 impl->CGData.MArgsStorage.emplace_back(Size);
1857 void *Storage =
static_cast<void *
>(impl->CGData.MArgsStorage.back().data());
1858 std::memcpy(Storage, Ptr, Size);
1862 void handler::SetHostTask(std::function<
void()> &&Func) {
1863 setNDRangeDescriptor(range<1>(1));
1864 impl->MHostTask.reset(
new detail::HostTask(std::move(Func)));
1868 void handler::SetHostTask(std::function<
void(interop_handle)> &&Func) {
1869 setNDRangeDescriptor(range<1>(1));
1870 impl->MHostTask.reset(
new detail::HostTask(std::move(Func)));
1876 impl->CGData.MRequirements.push_back(
Accessor.get());
1878 impl->CGData.MAccStorage.push_back(std::move(
Accessor));
1881 void handler::addLifetimeSharedPtrStorage(std::shared_ptr<const void> SPtr) {
1882 impl->CGData.MSharedPtrStorage.push_back(std::move(SPtr));
1886 int AccessTarget,
int ArgIndex) {
1887 impl->MArgs.emplace_back(ArgKind, Req, AccessTarget, ArgIndex);
1890 void handler::clearArgs() { impl->MArgs.clear(); }
1892 void handler::setArgsToAssociatedAccessors() {
1893 impl->MArgs = impl->MAssociatedAccesors;
1896 bool handler::HasAssociatedAccessor(detail::AccessorImplHost *Req,
1898 return std::find_if(
1899 impl->MAssociatedAccesors.cbegin(),
1900 impl->MAssociatedAccesors.cend(), [&](
const detail::ArgDesc &AD) {
1901 return AD.MType == detail::kernel_param_kind_t::kind_accessor &&
1903 AD.MSize == static_cast<int>(AccessTarget);
1904 }) == impl->MAssociatedAccesors.end();
1911 bool SetNumWorkGroups,
int Dims) {
1912 impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups, Dims};
1914 void handler::setNDRangeDescriptorPadded(
sycl::range<3> NumWorkItems,
1916 impl->MNDRDesc = NDRDescT{NumWorkItems, Offset, Dims};
1918 void handler::setNDRangeDescriptorPadded(
sycl::range<3> NumWorkItems,
1921 impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset, Dims};
static ProgramManager & getInstance()
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
bool kernelUsesAssert(const std::string &KernelName) const
static const char * get()
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer=nullptr, const std::vector< ur_exp_command_buffer_sync_point_t > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
static Scheduler & getInstance()
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
const char * c_str() const noexcept
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Graph in the modifiable state.
std::unique_lock< std::shared_mutex > WriteLock
Command group handler class.
void depends_on(event Event)
Registers event dependencies on this command group.
void ext_oneapi_signal_external_semaphore(ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands submitted to the queue...
void ext_oneapi_wait_external_semaphore(ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Submit a non-blocking device-side wait on an external.
void ext_oneapi_copy(const void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
void use_kernel_bundle(const kernel_bundle< bundle_state::executable > &ExecBundle)
A unique identifier of an item in an index space.
context get_context() const noexcept
Defines the iteration domain of either a single work-group in a parallel dispatch,...
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
ur_image_channel_type_t convertChannelType(image_channel_type Type)
void * getValueFromDynamicParameter(ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase)
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
constexpr const char * SYCL_STREAM_NAME
ur_exp_image_copy_flags_t getUrImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType)
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
ur_image_channel_order_t convertChannelOrder(image_channel_order Order)
void enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< ur_event_handle_t > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, const bool KernelUsesClusterLaunch, const RTDeviceBinaryImage *BinImage)
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
std::shared_ptr< event_impl > EventImplPtr
AccessorImplHost Requirement
CGType
Type of the command group.
kernel_id get_kernel_id_impl(string_view KernelName)
std::shared_ptr< AccessorImplHost > AccessorImplPtr
std::tuple< const RTDeviceBinaryImage *, ur_program_handle_t > retrieveKernelBinary(const QueueImplPtr &, const char *KernelName, CGExecKernel *CGKernel=nullptr)
node_type getNodeTypeFromCG(sycl::detail::CGType CGType)
image_channel_order get_image_default_channel_order(unsigned int num_channels)
@ executable
In executable state, the graph is ready to execute.
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, _Tp > arg(const complex< _Tp > &__c)
forward_progress_guarantee
external_semaphore_handle_type
constexpr size_t MaxNumAdditionalArgs
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
static bool checkContextSupports(const std::shared_ptr< detail::context_impl > &ContextImpl, ur_context_info_t InfoQuery)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
std::set< std::uintptr_t > MImageIdentifiers
A struct to describe the properties of an image.
unsigned int num_channels
image_channel_type channel_type
Opaque image memory handle type.
raw_handle_type raw_handle
Opaque interop semaphore handle type.
raw_handle_type raw_handle
external_semaphore_handle_type handle_type
C++ utilities for Unified Runtime integration.