26 #ifdef XPTI_ENABLE_INSTRUMENTATION
27 #include <xpti/xpti_data_types.h>
28 #include <xpti/xpti_trace_framework.hpp>
32 inline namespace _V1 {
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 uint8_t GMemAllocStreamID;
37 xpti::trace_event_data_t *GMemAllocEvent;
45 uint64_t CorrelationID = 0;
46 #ifdef XPTI_ENABLE_INSTRUMENTATION
47 constexpr uint16_t NotificationTraceType =
48 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_alloc_begin);
49 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
50 xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 , AllocSize,
53 CorrelationID = xptiGetUniqueId();
54 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
55 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
62 size_t AllocSize,
size_t GuardZone,
63 uint64_t CorrelationID) {
69 #ifdef XPTI_ENABLE_INSTRUMENTATION
70 constexpr uint16_t NotificationTraceType =
71 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_alloc_end);
72 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
73 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
75 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
76 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
84 uint64_t CorrelationID = 0;
85 #ifdef XPTI_ENABLE_INSTRUMENTATION
86 constexpr uint16_t NotificationTraceType =
87 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_release_begin);
88 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
89 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 ,
92 CorrelationID = xptiGetUniqueId();
93 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
94 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
101 uint64_t CorrelationID) {
105 #ifdef XPTI_ENABLE_INSTRUMENTATION
106 constexpr uint16_t NotificationTraceType =
107 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_release_end);
108 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
109 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 ,
112 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
113 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
121 if (!Events.empty()) {
122 const PluginPtr &Plugin = Events[0]->getPlugin();
123 std::vector<sycl::detail::pi::PiEvent> PiEvents(Events.size());
124 std::transform(Events.begin(), Events.end(), PiEvents.begin(),
126 return EventImpl->getHandleRef();
135 #ifdef XPTI_ENABLE_INSTRUMENTATION
140 #ifdef XPTI_ENABLE_INSTRUMENTATION
143 xpti::utils::finally _{[&] {
145 uintptr_t MemObjID = (uintptr_t)(*RetMem);
154 *RetMem,
nullptr, &Ptr);
169 #ifdef XPTI_ENABLE_INSTRUMENTATION
172 uintptr_t MemObjID = (uintptr_t)(Mem);
175 if (xptiTraceEnabled()) {
182 Ptr = (uintptr_t)(PtrHandle);
187 #ifdef XPTI_ENABLE_INSTRUMENTATION
189 xpti::utils::finally _{
201 #ifdef XPTI_ENABLE_INSTRUMENTATION
203 uintptr_t MemObjID = (uintptr_t)(Buffer);
207 #ifdef XPTI_ENABLE_INSTRUMENTATION
209 xpti::utils::finally _{[&] {
215 Offset, Size, NumEvents,
216 WaitList, Event, RetMap);
222 #ifdef XPTI_ENABLE_INSTRUMENTATION
224 uintptr_t MemObjID = (uintptr_t)(Mem);
225 uintptr_t Ptr = (uintptr_t)(MappedPtr);
229 #ifdef XPTI_ENABLE_INSTRUMENTATION
231 xpti::utils::finally _{[&] {
249 std::vector<EventImplPtr> DepEvents,
256 MemObj->
releaseMem(TargetContext, MemAllocation);
262 if (UserPtr == MemAllocation) {
267 if (TargetContext->is_host()) {
272 const PluginPtr &Plugin = TargetContext->getPlugin();
277 bool InitFromUserData,
void *HostPtr,
278 std::vector<EventImplPtr> DepEvents,
285 return MemObj->
allocateMem(TargetContext, InitFromUserData, HostPtr,
290 bool HostPtrReadOnly,
size_t Size,
292 std::ignore = HostPtrReadOnly;
308 (void)InteropContext;
310 assert(TargetContext == InteropContext &&
"Expected matching contexts");
311 OutEventToWait = InteropEvent->getHandleRef();
314 if (
nullptr != OutEventToWait) {
315 const PluginPtr &Plugin = InteropEvent->getPlugin();
332 ContextImplPtr TargetContext,
void *UserPtr,
bool HostPtrReadOnly,
340 const PluginPtr &Plugin = TargetContext->getPlugin();
342 CreationFlags, &Format, &Desc,
349 bool HostPtrReadOnly,
const size_t Size,
358 const PluginPtr &Plugin = TargetContext->getPlugin();
360 std::vector<pi_mem_properties> AllocProps;
363 TargetContext->isBufferLocationSupported()) {
366 .get_buffer_location();
367 AllocProps.reserve(AllocProps.size() + 2);
369 AllocProps.push_back(Location);
375 AllocProps.reserve(AllocProps.size() + 2);
377 AllocProps.push_back(Channel);
381 if (!AllocProps.empty()) {
384 AllocProps.push_back(0);
385 AllocPropsPtr = AllocProps.data();
389 Size, UserPtr, &NewMem, AllocPropsPtr);
395 bool HostPtrReadOnly,
size_t Size,
const EventImplPtr &InteropEvent,
399 if (TargetContext->is_host())
402 else if (UserPtr && InteropContext)
405 InteropContext, PropsList, OutEventToWait);
415 bool HostPtrReadOnly,
size_t Size,
421 if (TargetContext->is_host())
424 if (UserPtr && InteropContext)
426 InteropContext, PropsList, OutEventToWait);
432 void *ParentMemObj,
size_t ElemSize,
434 std::vector<EventImplPtr> DepEvents,
439 if (TargetContext->is_host())
440 return static_cast<void *
>(
static_cast<char *
>(ParentMemObj) + Offset);
442 size_t SizeInBytes = ElemSize;
443 for (
size_t I = 0; I < 3; ++I)
444 SizeInBytes *= Range[I];
449 const PluginPtr &Plugin = TargetContext->getPlugin();
453 if (Error == PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET)
454 throw invalid_object_error(
455 "Specified offset of the sub-buffer being constructed is not a "
456 "multiple of the memory base address alignment",
457 PI_ERROR_INVALID_VALUE);
459 if (Error != PI_SUCCESS) {
460 Plugin->reportPiError(Error,
"allocateMemSubBuffer()");
480 if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
502 unsigned int DstElemSize,
503 std::vector<sycl::detail::pi::PiEvent> DepEvents,
506 (void)SrcAccessRange;
507 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
510 const PluginPtr &Plugin = TgtQueue->getPlugin();
517 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
518 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
519 size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.
XTerm] * DstElemSize;
520 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
521 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
523 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
524 if (1 == DimDst && 1 == DimSrc) {
525 if (OutEventImpl !=
nullptr)
526 OutEventImpl->setHostEnqueueTime();
529 PI_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
530 SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
532 size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
533 size_t BufferSlicePitch =
534 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
535 size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
536 size_t HostSlicePitch =
537 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
540 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
542 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
544 DstAccessRange[DstPos.
YTerm],
545 DstAccessRange[DstPos.
ZTerm]};
546 if (OutEventImpl !=
nullptr)
547 OutEventImpl->setHostEnqueueTime();
550 PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
551 BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
552 SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
555 size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
556 size_t InputSlicePitch =
557 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
560 DstOffset[DstPos.YTerm],
561 DstOffset[DstPos.ZTerm]};
563 DstAccessRange[DstPos.YTerm],
564 DstAccessRange[DstPos.ZTerm]};
565 if (OutEventImpl !=
nullptr)
566 OutEventImpl->setHostEnqueueTime();
569 PI_FALSE, &Origin, &Region, InputRowPitch,
570 InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
580 unsigned int DstElemSize,
581 std::vector<sycl::detail::pi::PiEvent> DepEvents,
584 (void)DstAccessRange;
585 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
588 const PluginPtr &Plugin = SrcQueue->getPlugin();
601 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
602 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
603 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
604 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
605 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
607 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
608 if (1 == DimDst && 1 == DimSrc) {
609 if (OutEventImpl !=
nullptr)
610 OutEventImpl->setHostEnqueueTime();
613 PI_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
614 DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
616 size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
617 size_t BufferSlicePitch =
618 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
619 size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
620 size_t HostSlicePitch =
621 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
624 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
626 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
628 SrcAccessRange[SrcPos.
YTerm],
629 SrcAccessRange[SrcPos.
ZTerm]};
630 if (OutEventImpl !=
nullptr)
631 OutEventImpl->setHostEnqueueTime();
634 PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
635 BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
636 DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
639 size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
641 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
644 SrcOffset[SrcPos.YTerm],
645 SrcOffset[SrcPos.ZTerm]};
647 SrcAccessRange[SrcPos.YTerm],
648 SrcAccessRange[SrcPos.ZTerm]};
649 if (OutEventImpl !=
nullptr)
650 OutEventImpl->setHostEnqueueTime();
652 Queue, SrcMem,
PI_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
653 DepEvents.size(), DepEvents.data(), &OutEvent);
663 std::vector<sycl::detail::pi::PiEvent> DepEvents,
666 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
669 const PluginPtr &Plugin = SrcQueue->getPlugin();
676 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
677 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
678 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
679 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
680 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
682 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
683 if (1 == DimDst && 1 == DimSrc) {
684 if (OutEventImpl !=
nullptr)
685 OutEventImpl->setHostEnqueueTime();
687 Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
688 SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
694 size_t SrcRowPitch = SrcSzWidthBytes;
695 size_t SrcSlicePitch = (DimSrc <= 1)
697 : SrcSzWidthBytes * SrcSize[SrcPos.
YTerm];
698 size_t DstRowPitch = DstSzWidthBytes;
699 size_t DstSlicePitch = (DimDst <= 1)
701 : DstSzWidthBytes * DstSize[DstPos.
YTerm];
704 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
706 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
708 SrcAccessRange[SrcPos.
YTerm],
709 SrcAccessRange[SrcPos.
ZTerm]};
710 if (OutEventImpl !=
nullptr)
711 OutEventImpl->setHostEnqueueTime();
713 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
714 SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
715 DepEvents.data(), &OutEvent);
719 SrcOffset[SrcPos.
YTerm],
720 SrcOffset[SrcPos.
ZTerm]};
722 DstOffset[DstPos.
YTerm],
723 DstOffset[DstPos.
ZTerm]};
725 SrcAccessRange[SrcPos.
YTerm],
726 SrcAccessRange[SrcPos.
ZTerm]};
727 if (OutEventImpl !=
nullptr)
728 OutEventImpl->setHostEnqueueTime();
730 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
731 DepEvents.size(), DepEvents.data(), &OutEvent);
741 unsigned int DstElemSize,
742 std::vector<sycl::detail::pi::PiEvent>,
744 if ((DimSrc != 1 || DimDst != 1) &&
745 (SrcOffset !=
id<3>{0, 0, 0} || DstOffset !=
id<3>{0, 0, 0} ||
746 SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
747 throw runtime_error(
"Not supported configuration of memcpy requested",
748 PI_ERROR_INVALID_OPERATION);
751 SrcMem += SrcOffset[0] * SrcElemSize;
752 DstMem += DstOffset[0] * DstElemSize;
754 if (SrcMem == DstMem)
758 SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
759 std::memcpy(DstMem, SrcMem, BytesToCopy);
771 unsigned int DstElemSize,
772 std::vector<sycl::detail::pi::PiEvent> DepEvents,
776 if (SrcQueue->is_host()) {
777 if (TgtQueue->is_host())
778 copyH2H(SYCLMemObj, (
char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
779 SrcAccessRange, SrcOffset, SrcElemSize, (
char *)DstMem,
780 std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
781 DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
783 copyH2D(SYCLMemObj, (
char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
784 SrcAccessRange, SrcOffset, SrcElemSize,
785 pi::cast<sycl::detail::pi::PiMem>(DstMem), std::move(TgtQueue),
786 DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
787 std::move(DepEvents), OutEvent, OutEventImpl);
789 if (TgtQueue->is_host())
790 copyD2H(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
791 std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
792 SrcElemSize, (
char *)DstMem, std::move(TgtQueue), DimDst, DstSize,
793 DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
794 OutEvent, OutEventImpl);
796 copyD2D(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
797 std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
798 SrcElemSize, pi::cast<sycl::detail::pi::PiMem>(DstMem),
799 std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
800 DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
812 unsigned int DstElemSize,
813 std::vector<sycl::detail::pi::PiEvent> DepEvents,
815 MemoryManager::copy(SYCLMemObj, SrcMem, SrcQueue, DimSrc, SrcSize,
816 SrcAccessRange, SrcOffset, SrcElemSize, DstMem, TgtQueue,
817 DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
818 DepEvents, OutEvent,
nullptr);
822 size_t PatternSize,
const char *Pattern,
825 unsigned int ElementSize,
826 std::vector<sycl::detail::pi::PiEvent> DepEvents,
829 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
831 const PluginPtr &Plugin = Queue->getPlugin();
833 if (SYCLMemObj->
getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
834 if (OutEventImpl !=
nullptr)
835 OutEventImpl->setHostEnqueueTime();
839 bool RangesUsable = (Dim <= 1) || (MemRange == AccRange);
842 bool OffsetUsable = (Dim <= 1) || (Offset ==
sycl::id<3>{0, 0, 0});
843 size_t RangeMultiplier = AccRange[0] * AccRange[1] * AccRange[2];
845 if (RangesUsable && OffsetUsable) {
847 Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem),
848 Pattern, PatternSize, Offset[0] * ElementSize,
849 RangeMultiplier * ElementSize, DepEvents.size(), DepEvents.data(),
855 throw runtime_error(
"Not supported configuration of fill requested",
856 PI_ERROR_INVALID_OPERATION);
858 if (OutEventImpl !=
nullptr)
859 OutEventImpl->setHostEnqueueTime();
863 Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
864 &Offset[0], &AccRange[0], DepEvents.
size(), DepEvents.data(),
871 size_t PatternSize,
const char *Pattern,
874 unsigned int ElementSize,
875 std::vector<sycl::detail::pi::PiEvent> DepEvents,
878 Range, Offset, ElementSize, DepEvents, OutEvent,
nullptr);
884 unsigned int ElementSize,
885 std::vector<sycl::detail::pi::PiEvent> DepEvents,
887 if (Queue->is_host()) {
888 throw runtime_error(
"Not supported configuration of map requested",
889 PI_ERROR_INVALID_OPERATION);
895 case access::mode::read:
902 case access::mode::atomic:
905 case access::mode::discard_write:
906 case access::mode::discard_read_write:
911 AccessOffset[0] *= ElementSize;
912 AccessRange[0] *= ElementSize;
915 assert(AccessOffset[0] == 0 &&
"Handle offset");
917 void *MappedPtr =
nullptr;
918 const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
919 const PluginPtr &Plugin = Queue->getPlugin();
921 pi::cast<sycl::detail::pi::PiMem>(Mem),
PI_FALSE, Flags,
922 AccessOffset[0], BytesToMap, DepEvents.size(),
923 DepEvents.data(), &OutEvent, &MappedPtr);
929 std::vector<sycl::detail::pi::PiEvent> DepEvents,
936 const PluginPtr &Plugin = Queue->getPlugin();
938 pi::cast<sycl::detail::pi::PiMem>(Mem), MappedPtr,
939 DepEvents.size(), DepEvents.data(), &OutEvent);
942 void MemoryManager::copy_usm(
const void *SrcMem,
QueueImplPtr SrcQueue,
943 size_t Len,
void *DstMem,
944 std::vector<sycl::detail::pi::PiEvent> DepEvents,
947 assert(!SrcQueue->getContextImplPtr()->is_host() &&
948 "Host queue not supported in fill_usm.");
951 if (!DepEvents.empty()) {
952 if (OutEventImpl !=
nullptr)
953 OutEventImpl->setHostEnqueueTime();
955 SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
961 if (!SrcMem || !DstMem)
962 throw runtime_error(
"NULL pointer argument in memory copy operation.",
963 PI_ERROR_INVALID_VALUE);
965 const PluginPtr &Plugin = SrcQueue->getPlugin();
966 if (OutEventImpl !=
nullptr)
967 OutEventImpl->setHostEnqueueTime();
969 SrcQueue->getHandleRef(),
970 PI_FALSE, DstMem, SrcMem, Len, DepEvents.size(),
971 DepEvents.data(), OutEvent);
975 void MemoryManager::copy_usm(
const void *SrcMem,
QueueImplPtr SrcQueue,
976 size_t Len,
void *DstMem,
977 std::vector<sycl::detail::pi::PiEvent> DepEvents,
979 MemoryManager::copy_usm(SrcMem, SrcQueue, Len, DstMem, DepEvents, OutEvent,
983 void MemoryManager::fill_usm(
void *Mem,
QueueImplPtr Queue,
size_t Length,
985 std::vector<sycl::detail::pi::PiEvent> DepEvents,
988 assert(!Queue->getContextImplPtr()->is_host() &&
989 "Host queue not supported in fill_usm.");
992 if (!DepEvents.empty()) {
993 if (OutEventImpl !=
nullptr)
994 OutEventImpl->setHostEnqueueTime();
996 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1002 throw runtime_error(
"NULL pointer argument in memory fill operation.",
1003 PI_ERROR_INVALID_VALUE);
1004 if (OutEventImpl !=
nullptr)
1005 OutEventImpl->setHostEnqueueTime();
1006 const PluginPtr &Plugin = Queue->getPlugin();
1008 Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(),
1009 DepEvents.data(), OutEvent);
1015 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1017 MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent,
1021 void MemoryManager::prefetch_usm(
1023 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1026 assert(!Queue->getContextImplPtr()->is_host() &&
1027 "Host queue not supported in prefetch_usm.");
1029 const PluginPtr &Plugin = Queue->getPlugin();
1030 if (OutEventImpl !=
nullptr)
1031 OutEventImpl->setHostEnqueueTime();
1034 DepEvents.size(), DepEvents.data(), OutEvent);
1038 void MemoryManager::prefetch_usm(
1040 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1042 MemoryManager::prefetch_usm(Mem, Queue, Length, DepEvents, OutEvent,
nullptr);
1045 void MemoryManager::advise_usm(
1047 std::vector<sycl::detail::pi::PiEvent> ,
1050 assert(!Queue->getContextImplPtr()->is_host() &&
1051 "Host queue not supported in advise_usm.");
1053 const PluginPtr &Plugin = Queue->getPlugin();
1054 if (OutEventImpl !=
nullptr)
1055 OutEventImpl->setHostEnqueueTime();
1057 Length, Advice, OutEvent);
1063 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1065 MemoryManager::advise_usm(Mem, Queue, Length, Advice, DepEvents, OutEvent,
1069 void MemoryManager::copy_2d_usm(
1070 const void *SrcMem,
size_t SrcPitch,
QueueImplPtr Queue,
void *DstMem,
1071 size_t DstPitch,
size_t Width,
size_t Height,
1072 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1075 assert(!Queue->getContextImplPtr()->is_host() &&
1076 "Host queue not supported in copy_2d_usm.");
1078 if (Width == 0 || Height == 0) {
1080 if (!DepEvents.empty()) {
1081 if (OutEventImpl !=
nullptr)
1082 OutEventImpl->setHostEnqueueTime();
1084 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1089 if (!DstMem || !SrcMem)
1091 "NULL pointer argument in 2D memory copy operation.");
1093 const PluginPtr &Plugin = Queue->getPlugin();
1095 pi_bool SupportsUSMMemcpy2D =
false;
1097 Queue->getContextImplPtr()->getHandleRef(),
1099 &SupportsUSMMemcpy2D,
nullptr);
1101 if (SupportsUSMMemcpy2D) {
1102 if (OutEventImpl !=
nullptr)
1103 OutEventImpl->setHostEnqueueTime();
1106 Queue->getHandleRef(),
PI_FALSE, DstMem, DstPitch, SrcMem,
1107 SrcPitch, Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1113 context Ctx = createSyclObjFromImpl<context>(Queue->getContextImplPtr());
1117 SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
1119 DstAllocType == usm::alloc::unknown || DstAllocType == usm::alloc::host;
1120 assert((SrcIsHost || DstIsHost) &&
"In fallback path for copy_2d_usm either "
1121 "source or destination must be on host.");
1125 std::vector<OwnedPiEvent> CopyEventsManaged;
1126 CopyEventsManaged.reserve(Height);
1128 std::vector<sycl::detail::pi::PiEvent> CopyEvents(Height);
1129 if (OutEventImpl !=
nullptr)
1130 OutEventImpl->setHostEnqueueTime();
1131 for (
size_t I = 0; I < Height; ++I) {
1132 char *DstItBegin =
static_cast<char *
>(DstMem) + I * DstPitch;
1133 const char *SrcItBegin =
static_cast<const char *
>(SrcMem) + I * SrcPitch;
1135 Queue->getHandleRef(),
PI_FALSE, DstItBegin, SrcItBegin,
1136 Width, DepEvents.size(), DepEvents.data(), CopyEvents.data() + I);
1137 CopyEventsManaged.emplace_back(CopyEvents[I], Plugin,
1140 if (OutEventImpl !=
nullptr)
1141 OutEventImpl->setHostEnqueueTime();
1144 Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent);
1148 void MemoryManager::copy_2d_usm(
1149 const void *SrcMem,
size_t SrcPitch,
QueueImplPtr Queue,
void *DstMem,
1150 size_t DstPitch,
size_t Width,
size_t Height,
1151 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1153 MemoryManager::copy_2d_usm(SrcMem, SrcPitch, Queue, DstMem, DstPitch, Width,
1154 Height, DepEvents, OutEvent,
nullptr);
1157 void MemoryManager::fill_2d_usm(
1158 void *DstMem,
QueueImplPtr Queue,
size_t Pitch,
size_t Width,
size_t Height,
1159 const std::vector<char> &Pattern,
1160 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1163 assert(!Queue->getContextImplPtr()->is_host() &&
1164 "Host queue not supported in fill_2d_usm.");
1166 if (Width == 0 || Height == 0) {
1168 if (!DepEvents.empty()) {
1169 if (OutEventImpl !=
nullptr)
1170 OutEventImpl->setHostEnqueueTime();
1172 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1179 "NULL pointer argument in 2D memory fill operation.");
1180 if (OutEventImpl !=
nullptr)
1181 OutEventImpl->setHostEnqueueTime();
1182 const PluginPtr &Plugin = Queue->getPlugin();
1184 Queue->getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(),
1185 Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1189 void MemoryManager::fill_2d_usm(
1190 void *DstMem,
QueueImplPtr Queue,
size_t Pitch,
size_t Width,
size_t Height,
1191 const std::vector<char> &Pattern,
1192 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1194 MemoryManager::fill_2d_usm(DstMem, Queue, Pitch, Width, Height, Pattern,
1195 DepEvents, OutEvent,
nullptr);
1198 void MemoryManager::memset_2d_usm(
1199 void *DstMem,
QueueImplPtr Queue,
size_t Pitch,
size_t Width,
size_t Height,
1200 char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1203 assert(!Queue->getContextImplPtr()->is_host() &&
1204 "Host queue not supported in fill_2d_usm.");
1206 if (Width == 0 || Height == 0) {
1208 if (!DepEvents.empty()) {
1209 if (OutEventImpl !=
nullptr)
1210 OutEventImpl->setHostEnqueueTime();
1212 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1220 "NULL pointer argument in 2D memory memset operation.");
1221 if (OutEventImpl !=
nullptr)
1222 OutEventImpl->setHostEnqueueTime();
1223 const PluginPtr &Plugin = Queue->getPlugin();
1225 Queue->getHandleRef(), DstMem, Pitch,
static_cast<int>(Value), Width,
1226 Height, DepEvents.size(), DepEvents.data(), OutEvent);
1230 void MemoryManager::memset_2d_usm(
1231 void *DstMem,
QueueImplPtr Queue,
size_t Pitch,
size_t Width,
size_t Height,
1232 char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1234 MemoryManager::memset_2d_usm(DstMem, Queue, Pitch, Width, Height, Value,
1235 DepEvents, OutEvent,
nullptr);
1241 size_t NumBytes,
size_t Offset,
const void *Src,
1242 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1248 void *Dest = DeviceGlobalUSM.
getPtr();
1256 std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1257 const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1258 ZIEvent ? AuxDepEventsStorage : DepEvents;
1263 AuxDepEventsStorage = DepEvents;
1264 AuxDepEventsStorage.push_back(ZIEvent.
GetEvent());
1267 MemoryManager::copy_usm(Src, Queue, NumBytes,
1268 reinterpret_cast<char *
>(Dest) + Offset,
1269 ActualDepEvents, OutEvent, OutEventImpl);
1274 size_t NumBytes,
size_t Offset,
void *Dest,
1275 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1282 void *Src = DeviceGlobalUSM.
getPtr();
1290 std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1291 const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1292 ZIEvent ? AuxDepEventsStorage : DepEvents;
1297 AuxDepEventsStorage = DepEvents;
1298 AuxDepEventsStorage.push_back(ZIEvent.
GetEvent());
1301 MemoryManager::copy_usm(
reinterpret_cast<const char *
>(Src) + Offset, Queue,
1302 NumBytes, Dest, ActualDepEvents, OutEvent,
1310 "device_global is not device image scope decorated.");
1315 "More than one image exists with the device_global.");
1320 "No image exists with the device_global.");
1323 device Device = Queue->get_device();
1325 std::optional<sycl::detail::pi::PiProgram> CachedProgram =
1326 ContextImpl->getProgramForDeviceGlobal(Device, DeviceGlobalEntry);
1328 return *CachedProgram;
1331 auto Context = createSyclObjFromImpl<context>(ContextImpl);
1343 size_t NumBytes,
size_t Offset,
const void *Src,
1344 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1348 const PluginPtr &Plugin = Queue->getPlugin();
1350 Queue->getHandleRef(), Program, DeviceGlobalEntry->
MUniqueId.c_str(),
1351 false, NumBytes, Offset, Src, DepEvents.size(), DepEvents.data(),
1357 size_t NumBytes,
size_t Offset,
void *Dest,
1358 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1362 const PluginPtr &Plugin = Queue->getPlugin();
1364 Queue->getHandleRef(), Program, DeviceGlobalEntry->
MUniqueId.c_str(),
1365 false, NumBytes, Offset, Dest, DepEvents.size(), DepEvents.data(),
1369 void MemoryManager::copy_to_device_global(
1370 const void *DeviceGlobalPtr,
bool IsDeviceImageScoped,
QueueImplPtr Queue,
1371 size_t NumBytes,
size_t Offset,
const void *SrcMem,
1372 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1376 detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1380 "Invalid copy operation for device_global.");
1382 "Copy to device_global is out of bounds.");
1384 if (IsDeviceImageScoped)
1386 DepEvents, OutEvent);
1389 OutEvent, OutEventImpl);
1393 void MemoryManager::copy_to_device_global(
1394 const void *DeviceGlobalPtr,
bool IsDeviceImageScoped,
QueueImplPtr Queue,
1395 size_t NumBytes,
size_t Offset,
const void *SrcMem,
1396 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1398 copy_to_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes,
1399 Offset, SrcMem, DepEvents, OutEvent,
nullptr);
1402 void MemoryManager::copy_from_device_global(
1403 const void *DeviceGlobalPtr,
bool IsDeviceImageScoped,
QueueImplPtr Queue,
1404 size_t NumBytes,
size_t Offset,
void *DstMem,
1405 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1409 detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1413 "Invalid copy operation for device_global.");
1415 "Copy from device_global is out of bounds.");
1417 if (IsDeviceImageScoped)
1419 DepEvents, OutEvent);
1422 DepEvents, OutEvent, OutEventImpl);
1426 void MemoryManager::copy_from_device_global(
1427 const void *DeviceGlobalPtr,
bool IsDeviceImageScoped,
QueueImplPtr Queue,
1428 size_t NumBytes,
size_t Offset,
void *DstMem,
1429 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1431 copy_from_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes,
1432 Offset, DstMem, DepEvents, OutEvent,
nullptr);
1436 void MemoryManager::ext_oneapi_copyD2D_cmd_buffer(
1441 unsigned int SrcElemSize,
void *DstMem,
unsigned int DimDst,
1444 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1446 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1447 (void)DstAccessRange;
1449 const PluginPtr &Plugin = Context->getPlugin();
1456 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
1457 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
1458 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
1459 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
1460 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
1462 if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1464 "Images are not supported in Graphs");
1467 if (1 == DimDst && 1 == DimSrc) {
1469 CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1470 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), SrcXOffBytes,
1471 DstXOffBytes, SrcAccessRangeWidthBytes, Deps.size(), Deps.data(),
1477 size_t SrcRowPitch = SrcSzWidthBytes;
1478 size_t SrcSlicePitch = (DimSrc <= 1)
1480 : SrcSzWidthBytes * SrcSize[SrcPos.
YTerm];
1481 size_t DstRowPitch = DstSzWidthBytes;
1482 size_t DstSlicePitch = (DimDst <= 1)
1484 : DstSzWidthBytes * DstSize[DstPos.
YTerm];
1487 SrcOffset[SrcPos.
ZTerm]};
1489 DstOffset[DstPos.
ZTerm]};
1491 SrcAccessRange[SrcPos.
YTerm],
1492 SrcAccessRange[SrcPos.
ZTerm]};
1495 CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1496 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), &SrcOrigin,
1497 &DstOrigin, &Region, SrcRowPitch, SrcSlicePitch, DstRowPitch,
1498 DstSlicePitch, Deps.size(), Deps.data(), OutSyncPoint);
1502 void MemoryManager::ext_oneapi_copyD2H_cmd_buffer(
1507 unsigned int SrcElemSize,
char *DstMem,
unsigned int DimDst,
1509 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1511 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1513 const PluginPtr &Plugin = Context->getPlugin();
1520 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
1521 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
1522 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
1523 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
1524 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
1526 if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1528 "Images are not supported in Graphs");
1531 if (1 == DimDst && 1 == DimSrc) {
1535 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1536 SrcXOffBytes, SrcAccessRangeWidthBytes, DstMem + DstXOffBytes,
1537 Deps.size(), Deps.data(), OutSyncPoint);
1539 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1542 "Device-to-host buffer copy command not supported by graph backend");
1544 Plugin->checkPiResult(Result);
1547 size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1548 size_t BufferSlicePitch =
1549 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
1550 size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1551 size_t HostSlicePitch =
1552 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
1555 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
1557 DstOffset[DstPos.
ZTerm]};
1559 SrcAccessRange[SrcPos.
YTerm],
1560 SrcAccessRange[SrcPos.
ZTerm]};
1565 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1566 &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1567 BufferSlicePitch, HostRowPitch, HostSlicePitch, DstMem, Deps.size(),
1568 Deps.data(), OutSyncPoint);
1569 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1572 "Device-to-host buffer copy command not supported by graph backend");
1574 Plugin->checkPiResult(Result);
1579 void MemoryManager::ext_oneapi_copyH2D_cmd_buffer(
1583 sycl::id<3> SrcOffset,
unsigned int SrcElemSize,
void *DstMem,
1586 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1588 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1590 const PluginPtr &Plugin = Context->getPlugin();
1597 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
1598 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
1599 size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.
XTerm] * DstElemSize;
1600 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
1601 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
1603 if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1605 "Images are not supported in Graphs");
1608 if (1 == DimDst && 1 == DimSrc) {
1612 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1613 DstXOffBytes, DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes,
1614 Deps.size(), Deps.data(), OutSyncPoint);
1616 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1619 "Host-to-device buffer copy command not supported by graph backend");
1621 Plugin->checkPiResult(Result);
1624 size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1625 size_t BufferSlicePitch =
1626 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
1627 size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1628 size_t HostSlicePitch =
1629 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
1632 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
1634 SrcOffset[SrcPos.
ZTerm]};
1636 DstAccessRange[DstPos.
YTerm],
1637 DstAccessRange[DstPos.
ZTerm]};
1642 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1643 &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1644 BufferSlicePitch, HostRowPitch, HostSlicePitch, SrcMem, Deps.size(),
1645 Deps.data(), OutSyncPoint);
1647 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1650 "Host-to-device buffer copy command not supported by graph backend");
1652 Plugin->checkPiResult(Result);
1657 void MemoryManager::ext_oneapi_copy_usm_cmd_buffer(
1660 void *DstMem, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1662 if (!SrcMem || !DstMem)
1663 throw runtime_error(
"NULL pointer argument in memory copy operation.",
1664 PI_ERROR_INVALID_VALUE);
1666 const PluginPtr &Plugin = Context->getPlugin();
1669 CommandBuffer, DstMem, SrcMem, Len, Deps.size(), Deps.data(),
1671 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1674 "USM copy command not supported by graph backend");
1676 Plugin->checkPiResult(Result);
1680 void MemoryManager::ext_oneapi_fill_usm_cmd_buffer(
1683 size_t Len,
int Pattern, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1687 throw runtime_error(
"NULL pointer argument in memory fill operation.",
1688 PI_ERROR_INVALID_VALUE);
1690 const PluginPtr &Plugin = Context->getPlugin();
1692 size_t PatternSize = 1;
1694 CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(),
1695 Deps.data(), OutSyncPoint);
1698 void MemoryManager::ext_oneapi_fill_cmd_buffer(
1701 void *Mem,
size_t PatternSize,
const char *Pattern,
unsigned int Dim,
1703 unsigned int ElementSize,
1704 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1706 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1708 const PluginPtr &Plugin = Context->getPlugin();
1709 if (SYCLMemObj->
getType() != detail::SYCLMemObjI::MemObjType::Buffer) {
1711 "Images are not supported in Graphs");
1716 bool RangesUsable = (Dim <= 1) || (Size == AccessRange);
1719 bool OffsetUsable = (Dim <= 1) || (AccessOffset ==
sycl::id<3>{0, 0, 0});
1720 size_t RangeMultiplier = AccessRange[0] * AccessRange[1] * AccessRange[2];
1722 if (RangesUsable && OffsetUsable) {
1724 CommandBuffer, pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
1725 PatternSize, AccessOffset[0] * ElementSize,
1726 RangeMultiplier * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
1731 throw runtime_error(
"Not supported configuration of fill requested",
1732 PI_ERROR_INVALID_OPERATION);
1735 void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer(
1738 size_t Length, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1740 assert(!Context->is_host() &&
"Host queue not supported in prefetch_usm.");
1742 const PluginPtr &Plugin = Context->getPlugin();
1745 Deps.data(), OutSyncPoint);
1748 void MemoryManager::ext_oneapi_advise_usm_cmd_buffer(
1752 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1754 assert(!Context->is_host() &&
"Host queue not supported in advise_usm.");
1756 const PluginPtr &Plugin = Context->getPlugin();
1758 CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(),
1762 void MemoryManager::copy_image_bindless(
1771 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1774 assert(!Queue->getContextImplPtr()->is_host() &&
1775 "Host queue not supported in copy_image_bindless.");
1780 "Invalid flags passed to copy_image_bindless.");
1784 "NULL pointer argument in bindless image copy operation.");
1788 Queue->getHandleRef(), Dst, Src, &Format, &Desc, Flags, &SrcOffset,
1789 &DstOffset, &CopyExtent, &HostExtent, DepEvents.size(), DepEvents.data(),
The context class represents a SYCL context on which kernel functions may be executed.
static void * allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range< 3 > Range, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateMemImage(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateImageObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const sycl::property_list &PropsList)
static void * allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::property_list &PropsList)
static void * allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const size_t Size, const sycl::property_list &PropsList)
static void * allocateMemBuffer(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
virtual void * allocateHostMem()=0
virtual void releaseMem(ContextImplPtr Context, void *Ptr)=0
virtual MemObjType getType() const =0
virtual void releaseHostMem(void *Ptr)=0
virtual void * allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, sycl::detail::pi::PiEvent &InteropEvent)=0
static void bufferAssociateNotification(const void *, const void *)
static void bufferReleaseNotification(const void *, const void *)
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
A unique identifier of an item in an index space.
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
PropT get_property() const
::pi_mem_flags PiMemFlags
::pi_ext_sync_point PiExtSyncPoint
static void memcpyToDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone)
static void memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
void prepTermPositions(TermPositions &pos, int Dimensions, detail::SYCLMemObjI::MemObjType Type)
void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent >, sycl::detail::pi::PiEvent &, const detail::EventImplPtr &)
void memUnmapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static void waitForEvents(const std::vector< EventImplPtr > &Events)
void memBufferMapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem)
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr)
static void memcpyToDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static sycl::detail::pi::PiProgram getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry)
void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, size_t AllocSize, size_t GuardZone, uint64_t CorrelationID)
void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, uint64_t CorrelationID)
void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 >, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void memcpyFromDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
static sycl::detail::pi::PiMemFlags getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly)
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
constexpr mode_tag_t< access_mode::read_write > read_write
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static sycl::event fill(sycl::queue q, void *dev_ptr, const T &pattern, size_t count)
Set pattern to the first count elements of type T starting from dev_ptr.
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY
uintptr_t pi_native_handle
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextMemImageCopy(pi_queue command_queue, void *dst_ptr, void *src_ptr, const pi_image_format *image_format, const pi_image_desc *image_desc, const pi_image_copy_flags flags, pi_image_offset src_offset, pi_image_offset dst_offset, pi_image_region copy_extent, pi_image_region host_extent, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to copy image data Host to Device or Device to Host.
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_bitfield pi_mem_properties
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextCommandBufferMemBufferCopy(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer copy command to the command-buffer.
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Device global variable.
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextCommandBufferMemBufferReadRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer read command to the command-buffer.
pi_result piextCommandBufferMemBufferRead(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer read command to the command-buffer.
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
pi_result piextCommandBufferMemBufferFill(pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer fill command to the command-buffer.
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_BUFFER_CREATE_TYPE_REGION
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D fill API.
pi_result piextCommandBufferMemBufferCopyRect(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer copy command to the command-buffer.
pi_result piMemRelease(pi_mem mem)
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
constexpr pi_map_flags PI_MAP_WRITE
constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM memcpy command to the command-buffer.
constexpr pi_map_flags PI_MAP_READ
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
pi_result piextCommandBufferMemBufferWrite(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer write command to the command-buffer.
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
pi_result piextCommandBufferFillUSM(pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM fill command to the command-buffer.
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
pi_result piextCommandBufferAdviseUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t length, pi_mem_advice advice, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Advise command to the command-buffer.
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
pi_result piextCommandBufferMemBufferWriteRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer write command to the command-buffer.
pi_result piextCommandBufferPrefetchUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Prefetch command to the command-buffer.
pi_result piEventRetain(pi_event event)
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL
bool MIsDeviceImageScopeDecorated
std::set< std::uintptr_t > MImageIdentifiers
DeviceGlobalUSMMem & getOrAllocateDeviceGlobalUSM(const std::shared_ptr< queue_impl > &QueueImpl)
std::uint32_t MDeviceGlobalTSize
std::unordered_set< RTDeviceBinaryImage * > MImages
OwnedPiEvent getInitEvent(const PluginPtr &Plugin)
void *const & getPtr() const noexcept
sycl::detail::pi::PiEvent GetEvent()