28 #ifdef XPTI_ENABLE_INSTRUMENTATION
29 #include <xpti/xpti_data_types.h>
30 #include <xpti/xpti_trace_framework.hpp>
34 inline namespace _V1 {
37 #ifdef XPTI_ENABLE_INSTRUMENTATION
38 uint8_t GMemAllocStreamID;
39 xpti::trace_event_data_t *GMemAllocEvent;
47 uint64_t CorrelationID = 0;
48 #ifdef XPTI_ENABLE_INSTRUMENTATION
49 constexpr uint16_t NotificationTraceType =
50 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_alloc_begin);
51 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
52 xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 , AllocSize,
55 CorrelationID = xptiGetUniqueId();
56 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
57 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
64 size_t AllocSize,
size_t GuardZone,
65 uint64_t CorrelationID) {
71 #ifdef XPTI_ENABLE_INSTRUMENTATION
72 constexpr uint16_t NotificationTraceType =
73 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_alloc_end);
74 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
75 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
77 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
78 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
86 uint64_t CorrelationID = 0;
87 #ifdef XPTI_ENABLE_INSTRUMENTATION
88 constexpr uint16_t NotificationTraceType =
89 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_release_begin);
90 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
91 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 ,
94 CorrelationID = xptiGetUniqueId();
95 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
96 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
103 uint64_t CorrelationID) {
107 #ifdef XPTI_ENABLE_INSTRUMENTATION
108 constexpr uint16_t NotificationTraceType =
109 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_release_end);
110 if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
111 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 ,
114 xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
115 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
123 if (!Events.empty()) {
124 const PluginPtr &Plugin = Events[0]->getPlugin();
125 std::vector<sycl::detail::pi::PiEvent> PiEvents(Events.size());
126 std::transform(Events.begin(), Events.end(), PiEvents.begin(),
128 return EventImpl->getHandleRef();
137 #ifdef XPTI_ENABLE_INSTRUMENTATION
142 #ifdef XPTI_ENABLE_INSTRUMENTATION
145 xpti::utils::finally _{[&] {
147 uintptr_t MemObjID = (uintptr_t)(*RetMem);
156 *RetMem,
nullptr, &Ptr);
171 #ifdef XPTI_ENABLE_INSTRUMENTATION
174 uintptr_t MemObjID = (uintptr_t)(Mem);
177 if (xptiTraceEnabled()) {
184 Ptr = (uintptr_t)(PtrHandle);
189 #ifdef XPTI_ENABLE_INSTRUMENTATION
191 xpti::utils::finally _{
203 #ifdef XPTI_ENABLE_INSTRUMENTATION
205 uintptr_t MemObjID = (uintptr_t)(Buffer);
209 #ifdef XPTI_ENABLE_INSTRUMENTATION
211 xpti::utils::finally _{[&] {
217 Offset, Size, NumEvents,
218 WaitList, Event, RetMap);
224 #ifdef XPTI_ENABLE_INSTRUMENTATION
226 uintptr_t MemObjID = (uintptr_t)(Mem);
227 uintptr_t Ptr = (uintptr_t)(MappedPtr);
231 #ifdef XPTI_ENABLE_INSTRUMENTATION
233 xpti::utils::finally _{[&] {
251 std::vector<EventImplPtr> DepEvents,
258 MemObj->
releaseMem(TargetContext, MemAllocation);
264 if (UserPtr == MemAllocation) {
269 if (!TargetContext) {
274 const PluginPtr &Plugin = TargetContext->getPlugin();
279 bool InitFromUserData,
void *HostPtr,
280 std::vector<EventImplPtr> DepEvents,
287 return MemObj->
allocateMem(TargetContext, InitFromUserData, HostPtr,
292 bool HostPtrReadOnly,
size_t Size,
294 std::ignore = HostPtrReadOnly;
309 (void)InteropContext;
311 assert(TargetContext == InteropContext &&
"Expected matching contexts");
312 OutEventToWait = InteropEvent->getHandleRef();
315 if (
nullptr != OutEventToWait) {
316 const PluginPtr &Plugin = InteropEvent->getPlugin();
333 ContextImplPtr TargetContext,
void *UserPtr,
bool HostPtrReadOnly,
341 const PluginPtr &Plugin = TargetContext->getPlugin();
343 CreationFlags, &Format, &Desc,
350 bool HostPtrReadOnly,
const size_t Size,
359 const PluginPtr &Plugin = TargetContext->getPlugin();
361 std::vector<pi_mem_properties> AllocProps;
364 TargetContext->isBufferLocationSupported()) {
367 .get_buffer_location();
368 AllocProps.reserve(AllocProps.size() + 2);
370 AllocProps.push_back(Location);
376 AllocProps.reserve(AllocProps.size() + 2);
378 AllocProps.push_back(Channel);
382 if (!AllocProps.empty()) {
385 AllocProps.push_back(0);
386 AllocPropsPtr = AllocProps.data();
390 Size, UserPtr, &NewMem, AllocPropsPtr);
396 bool HostPtrReadOnly,
size_t Size,
const EventImplPtr &InteropEvent,
403 else if (UserPtr && InteropContext)
406 InteropContext, PropsList, OutEventToWait);
416 bool HostPtrReadOnly,
size_t Size,
425 if (UserPtr && InteropContext)
427 InteropContext, PropsList, OutEventToWait);
433 void *ParentMemObj,
size_t ElemSize,
435 std::vector<EventImplPtr> DepEvents,
441 return static_cast<void *
>(
static_cast<char *
>(ParentMemObj) + Offset);
443 size_t SizeInBytes = ElemSize;
444 for (
size_t I = 0; I < 3; ++I)
445 SizeInBytes *= Range[I];
450 const PluginPtr &Plugin = TargetContext->getPlugin();
454 if (Error == PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET)
457 "Specified offset of the sub-buffer being constructed is not "
458 "a multiple of the memory base address alignment"),
461 if (Error != PI_SUCCESS) {
463 "allocateMemSubBuffer() failed"),
484 if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
506 unsigned int DstElemSize,
507 std::vector<sycl::detail::pi::PiEvent> DepEvents,
510 (void)SrcAccessRange;
511 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
512 assert(TgtQueue &&
"Destination mem object queue must be not nullptr");
515 const PluginPtr &Plugin = TgtQueue->getPlugin();
522 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
523 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
524 size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.
XTerm] * DstElemSize;
525 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
526 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
528 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
529 if (1 == DimDst && 1 == DimSrc) {
530 if (OutEventImpl !=
nullptr)
531 OutEventImpl->setHostEnqueueTime();
534 PI_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
535 SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
537 size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
538 size_t BufferSlicePitch =
539 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
540 size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
541 size_t HostSlicePitch =
542 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
545 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
547 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
549 DstAccessRange[DstPos.
YTerm],
550 DstAccessRange[DstPos.
ZTerm]};
551 if (OutEventImpl !=
nullptr)
552 OutEventImpl->setHostEnqueueTime();
555 PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
556 BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
557 SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
560 size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
561 size_t InputSlicePitch =
562 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
565 DstOffset[DstPos.YTerm],
566 DstOffset[DstPos.ZTerm]};
568 DstAccessRange[DstPos.YTerm],
569 DstAccessRange[DstPos.ZTerm]};
570 if (OutEventImpl !=
nullptr)
571 OutEventImpl->setHostEnqueueTime();
574 PI_FALSE, &Origin, &Region, InputRowPitch,
575 InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
585 unsigned int DstElemSize,
586 std::vector<sycl::detail::pi::PiEvent> DepEvents,
589 (void)DstAccessRange;
590 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
591 assert(SrcQueue &&
"Source mem object queue is expected to be not nullptr");
594 const PluginPtr &Plugin = SrcQueue->getPlugin();
607 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
608 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
609 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
610 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
611 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
613 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
614 if (1 == DimDst && 1 == DimSrc) {
615 if (OutEventImpl !=
nullptr)
616 OutEventImpl->setHostEnqueueTime();
619 PI_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
620 DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
622 size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
623 size_t BufferSlicePitch =
624 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
625 size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
626 size_t HostSlicePitch =
627 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
630 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
632 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
634 SrcAccessRange[SrcPos.
YTerm],
635 SrcAccessRange[SrcPos.
ZTerm]};
636 if (OutEventImpl !=
nullptr)
637 OutEventImpl->setHostEnqueueTime();
640 PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
641 BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
642 DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
645 size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
647 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
650 SrcOffset[SrcPos.YTerm],
651 SrcOffset[SrcPos.ZTerm]};
653 SrcAccessRange[SrcPos.YTerm],
654 SrcAccessRange[SrcPos.ZTerm]};
655 if (OutEventImpl !=
nullptr)
656 OutEventImpl->setHostEnqueueTime();
658 Queue, SrcMem,
PI_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
659 DepEvents.size(), DepEvents.data(), &OutEvent);
669 std::vector<sycl::detail::pi::PiEvent> DepEvents,
672 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
673 assert(SrcQueue &&
"Source mem object and target mem object queues are "
674 "expected to be not nullptr");
677 const PluginPtr &Plugin = SrcQueue->getPlugin();
684 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
685 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
686 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
687 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
688 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
690 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
691 if (1 == DimDst && 1 == DimSrc) {
692 if (OutEventImpl !=
nullptr)
693 OutEventImpl->setHostEnqueueTime();
695 Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
696 SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
702 size_t SrcRowPitch = SrcSzWidthBytes;
703 size_t SrcSlicePitch = (DimSrc <= 1)
705 : SrcSzWidthBytes * SrcSize[SrcPos.
YTerm];
706 size_t DstRowPitch = DstSzWidthBytes;
707 size_t DstSlicePitch = (DimDst <= 1)
709 : DstSzWidthBytes * DstSize[DstPos.
YTerm];
712 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
714 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
716 SrcAccessRange[SrcPos.
YTerm],
717 SrcAccessRange[SrcPos.
ZTerm]};
718 if (OutEventImpl !=
nullptr)
719 OutEventImpl->setHostEnqueueTime();
721 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
722 SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
723 DepEvents.data(), &OutEvent);
727 SrcOffset[SrcPos.
YTerm],
728 SrcOffset[SrcPos.
ZTerm]};
730 DstOffset[DstPos.
YTerm],
731 DstOffset[DstPos.
ZTerm]};
733 SrcAccessRange[SrcPos.
YTerm],
734 SrcAccessRange[SrcPos.
ZTerm]};
735 if (OutEventImpl !=
nullptr)
736 OutEventImpl->setHostEnqueueTime();
738 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
739 DepEvents.size(), DepEvents.data(), &OutEvent);
749 unsigned int DstElemSize,
750 std::vector<sycl::detail::pi::PiEvent>,
752 if ((DimSrc != 1 || DimDst != 1) &&
753 (SrcOffset !=
id<3>{0, 0, 0} || DstOffset !=
id<3>{0, 0, 0} ||
754 SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
756 "Not supported configuration of memcpy requested");
759 SrcMem += SrcOffset[0] * SrcElemSize;
760 DstMem += DstOffset[0] * DstElemSize;
762 if (SrcMem == DstMem)
766 SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
767 std::memcpy(DstMem, SrcMem, BytesToCopy);
779 unsigned int DstElemSize,
780 std::vector<sycl::detail::pi::PiEvent> DepEvents,
786 copyH2H(SYCLMemObj, (
char *)SrcMem,
nullptr, DimSrc, SrcSize,
787 SrcAccessRange, SrcOffset, SrcElemSize, (
char *)DstMem,
nullptr,
788 DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
789 std::move(DepEvents), OutEvent, OutEventImpl);
791 copyH2D(SYCLMemObj, (
char *)SrcMem,
nullptr, DimSrc, SrcSize,
792 SrcAccessRange, SrcOffset, SrcElemSize,
793 pi::cast<sycl::detail::pi::PiMem>(DstMem), std::move(TgtQueue),
794 DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
795 std::move(DepEvents), OutEvent, OutEventImpl);
798 copyD2H(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
799 std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
800 SrcElemSize, (
char *)DstMem,
nullptr, DimDst, DstSize,
801 DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
802 OutEvent, OutEventImpl);
804 copyD2D(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
805 std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
806 SrcElemSize, pi::cast<sycl::detail::pi::PiMem>(DstMem),
807 std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
808 DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
813 size_t PatternSize,
const unsigned char *Pattern,
816 unsigned int ElementSize,
817 std::vector<sycl::detail::pi::PiEvent> DepEvents,
820 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
821 assert(Queue &&
"Fill should be called only with a valid device queue");
823 const PluginPtr &Plugin = Queue->getPlugin();
825 if (SYCLMemObj->
getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
826 if (OutEventImpl !=
nullptr)
827 OutEventImpl->setHostEnqueueTime();
831 bool RangesUsable = (Dim <= 1) || (MemRange == AccRange);
834 bool OffsetUsable = (Dim <= 1) || (Offset ==
sycl::id<3>{0, 0, 0});
835 size_t RangeMultiplier = AccRange[0] * AccRange[1] * AccRange[2];
837 if (RangesUsable && OffsetUsable) {
839 Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem),
840 Pattern, PatternSize, Offset[0] * ElementSize,
841 RangeMultiplier * ElementSize, DepEvents.size(), DepEvents.data(),
848 "Not supported configuration of fill requested");
850 if (OutEventImpl !=
nullptr)
851 OutEventImpl->setHostEnqueueTime();
855 Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
856 &Offset[0], &AccRange[0], DepEvents.
size(), DepEvents.data(),
864 unsigned int ElementSize,
865 std::vector<sycl::detail::pi::PiEvent> DepEvents,
869 "Not supported configuration of map requested");
875 case access::mode::read:
882 case access::mode::atomic:
885 case access::mode::discard_write:
886 case access::mode::discard_read_write:
891 AccessOffset[0] *= ElementSize;
892 AccessRange[0] *= ElementSize;
895 assert(AccessOffset[0] == 0 &&
"Handle offset");
897 void *MappedPtr =
nullptr;
898 const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
899 const PluginPtr &Plugin = Queue->getPlugin();
901 pi::cast<sycl::detail::pi::PiMem>(Mem),
PI_FALSE, Flags,
902 AccessOffset[0], BytesToMap, DepEvents.size(),
903 DepEvents.data(), &OutEvent, &MappedPtr);
909 std::vector<sycl::detail::pi::PiEvent> DepEvents,
915 "Not supported configuration of unmap requested");
920 const PluginPtr &Plugin = Queue->getPlugin();
922 pi::cast<sycl::detail::pi::PiMem>(Mem), MappedPtr,
923 DepEvents.size(), DepEvents.data(), &OutEvent);
926 void MemoryManager::copy_usm(
const void *SrcMem,
QueueImplPtr SrcQueue,
927 size_t Len,
void *DstMem,
928 std::vector<sycl::detail::pi::PiEvent> DepEvents,
931 assert(SrcQueue &&
"USM copy must be called with a valid device queue");
933 if (!DepEvents.empty()) {
934 if (OutEventImpl !=
nullptr)
935 OutEventImpl->setHostEnqueueTime();
937 SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
943 if (!SrcMem || !DstMem)
945 "NULL pointer argument in memory copy operation.");
947 const PluginPtr &Plugin = SrcQueue->getPlugin();
948 if (OutEventImpl !=
nullptr)
949 OutEventImpl->setHostEnqueueTime();
951 SrcQueue->getHandleRef(),
952 PI_FALSE, DstMem, SrcMem, Len, DepEvents.size(),
953 DepEvents.data(), OutEvent);
956 void MemoryManager::fill_usm(
void *Mem,
QueueImplPtr Queue,
size_t Length,
957 const std::vector<unsigned char> &Pattern,
958 std::vector<sycl::detail::pi::PiEvent> DepEvents,
961 assert(Queue &&
"USM fill must be called with a valid device queue");
963 if (!DepEvents.empty()) {
964 if (OutEventImpl !=
nullptr)
965 OutEventImpl->setHostEnqueueTime();
967 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
974 "NULL pointer argument in memory fill operation.");
975 if (OutEventImpl !=
nullptr)
976 OutEventImpl->setHostEnqueueTime();
977 const PluginPtr &Plugin = Queue->getPlugin();
979 Queue->getHandleRef(), Mem, Pattern.data(), Pattern.size(), Length,
980 DepEvents.size(), DepEvents.data(), OutEvent);
983 void MemoryManager::prefetch_usm(
985 std::vector<sycl::detail::pi::PiEvent> DepEvents,
988 assert(Queue &&
"USM prefetch must be called with a valid device queue");
989 const PluginPtr &Plugin = Queue->getPlugin();
990 if (OutEventImpl !=
nullptr)
991 OutEventImpl->setHostEnqueueTime();
994 DepEvents.size(), DepEvents.data(), OutEvent);
997 void MemoryManager::advise_usm(
999 std::vector<sycl::detail::pi::PiEvent> ,
1002 assert(Queue &&
"USM advise must be called with a valid device queue");
1003 const PluginPtr &Plugin = Queue->getPlugin();
1004 if (OutEventImpl !=
nullptr)
1005 OutEventImpl->setHostEnqueueTime();
1007 Length, Advice, OutEvent);
1010 void MemoryManager::copy_2d_usm(
1011 const void *SrcMem,
size_t SrcPitch,
QueueImplPtr Queue,
void *DstMem,
1012 size_t DstPitch,
size_t Width,
size_t Height,
1013 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1016 assert(Queue &&
"USM copy 2d must be called with a valid device queue");
1017 if (Width == 0 || Height == 0) {
1019 if (!DepEvents.empty()) {
1020 if (OutEventImpl !=
nullptr)
1021 OutEventImpl->setHostEnqueueTime();
1023 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1028 if (!DstMem || !SrcMem)
1030 "NULL pointer argument in 2D memory copy operation.");
1032 const PluginPtr &Plugin = Queue->getPlugin();
1034 pi_bool SupportsUSMMemcpy2D =
false;
1036 Queue->getContextImplPtr()->getHandleRef(),
1038 &SupportsUSMMemcpy2D,
nullptr);
1040 if (SupportsUSMMemcpy2D) {
1041 if (OutEventImpl !=
nullptr)
1042 OutEventImpl->setHostEnqueueTime();
1045 Queue->getHandleRef(),
PI_FALSE, DstMem, DstPitch, SrcMem,
1046 SrcPitch, Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1052 context Ctx = createSyclObjFromImpl<context>(Queue->getContextImplPtr());
1056 SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
1058 DstAllocType == usm::alloc::unknown || DstAllocType == usm::alloc::host;
1059 assert((SrcIsHost || DstIsHost) &&
"In fallback path for copy_2d_usm either "
1060 "source or destination must be on host.");
1064 std::vector<OwnedPiEvent> CopyEventsManaged;
1065 CopyEventsManaged.reserve(Height);
1067 std::vector<sycl::detail::pi::PiEvent> CopyEvents(Height);
1068 if (OutEventImpl !=
nullptr)
1069 OutEventImpl->setHostEnqueueTime();
1070 for (
size_t I = 0; I < Height; ++I) {
1071 char *DstItBegin =
static_cast<char *
>(DstMem) + I * DstPitch;
1072 const char *SrcItBegin =
static_cast<const char *
>(SrcMem) + I * SrcPitch;
1074 Queue->getHandleRef(),
PI_FALSE, DstItBegin, SrcItBegin,
1075 Width, DepEvents.size(), DepEvents.data(), CopyEvents.data() + I);
1076 CopyEventsManaged.emplace_back(CopyEvents[I], Plugin,
1079 if (OutEventImpl !=
nullptr)
1080 OutEventImpl->setHostEnqueueTime();
1083 Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent);
1086 void MemoryManager::fill_2d_usm(
1087 void *DstMem,
QueueImplPtr Queue,
size_t Pitch,
size_t Width,
size_t Height,
1088 const std::vector<unsigned char> &Pattern,
1089 std::vector<sycl::detail::pi::PiEvent> DepEvents,
1092 assert(Queue &&
"USM fill 2d must be called with a valid device queue");
1093 if (Width == 0 || Height == 0) {
1095 if (!DepEvents.empty()) {
1096 if (OutEventImpl !=
nullptr)
1097 OutEventImpl->setHostEnqueueTime();
1099 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1106 "NULL pointer argument in 2D memory fill operation.");
1107 if (OutEventImpl !=
nullptr)
1108 OutEventImpl->setHostEnqueueTime();
1109 const PluginPtr &Plugin = Queue->getPlugin();
1111 Queue->getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(),
1112 Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1115 void MemoryManager::memset_2d_usm(
1116 void *DstMem,
QueueImplPtr Queue,
size_t Pitch,
size_t Width,
size_t Height,
1117 char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1120 assert(Queue &&
"USM memset 2d must be called with a valid device queue");
1121 if (Width == 0 || Height == 0) {
1123 if (!DepEvents.empty()) {
1124 if (OutEventImpl !=
nullptr)
1125 OutEventImpl->setHostEnqueueTime();
1127 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1135 "NULL pointer argument in 2D memory memset operation.");
1136 if (OutEventImpl !=
nullptr)
1137 OutEventImpl->setHostEnqueueTime();
1138 const PluginPtr &Plugin = Queue->getPlugin();
1140 Queue->getHandleRef(), DstMem, Pitch,
static_cast<int>(Value), Width,
1141 Height, DepEvents.size(), DepEvents.data(), OutEvent);
1147 size_t NumBytes,
size_t Offset,
const void *Src,
1148 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1152 "Copy to device global USM must be called with a valid device queue");
1156 void *Dest = DeviceGlobalUSM.
getPtr();
1164 std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1165 const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1166 ZIEvent ? AuxDepEventsStorage : DepEvents;
1171 AuxDepEventsStorage = DepEvents;
1172 AuxDepEventsStorage.push_back(ZIEvent.
GetEvent());
1175 MemoryManager::copy_usm(Src, Queue, NumBytes,
1176 reinterpret_cast<char *
>(Dest) + Offset,
1177 ActualDepEvents, OutEvent, OutEventImpl);
1182 size_t NumBytes,
size_t Offset,
void *Dest,
1183 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1190 void *Src = DeviceGlobalUSM.
getPtr();
1198 std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1199 const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1200 ZIEvent ? AuxDepEventsStorage : DepEvents;
1205 AuxDepEventsStorage = DepEvents;
1206 AuxDepEventsStorage.push_back(ZIEvent.
GetEvent());
1209 MemoryManager::copy_usm(
reinterpret_cast<const char *
>(Src) + Offset, Queue,
1210 NumBytes, Dest, ActualDepEvents, OutEvent,
1218 "device_global is not device image scope decorated.");
1223 "More than one image exists with the device_global.");
1228 "No image exists with the device_global.");
1231 device Device = Queue->get_device();
1233 std::optional<sycl::detail::pi::PiProgram> CachedProgram =
1234 ContextImpl->getProgramForDeviceGlobal(Device, DeviceGlobalEntry);
1236 return *CachedProgram;
1239 auto Context = createSyclObjFromImpl<context>(ContextImpl);
1251 size_t NumBytes,
size_t Offset,
const void *Src,
1252 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1256 "Direct copy to device global must be called with a valid device queue");
1259 const PluginPtr &Plugin = Queue->getPlugin();
1261 Queue->getHandleRef(), Program, DeviceGlobalEntry->
MUniqueId.c_str(),
1262 false, NumBytes, Offset, Src, DepEvents.size(), DepEvents.data(),
1268 size_t NumBytes,
size_t Offset,
void *Dest,
1269 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1271 assert(Queue &&
"Direct copy from device global must be called with a valid "
1275 const PluginPtr &Plugin = Queue->getPlugin();
1277 Queue->getHandleRef(), Program, DeviceGlobalEntry->
MUniqueId.c_str(),
1278 false, NumBytes, Offset, Dest, DepEvents.size(), DepEvents.data(),
1282 void MemoryManager::copy_to_device_global(
1283 const void *DeviceGlobalPtr,
bool IsDeviceImageScoped,
QueueImplPtr Queue,
1284 size_t NumBytes,
size_t Offset,
const void *SrcMem,
1285 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1289 detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1293 "Invalid copy operation for device_global.");
1295 "Copy to device_global is out of bounds.");
1297 if (IsDeviceImageScoped)
1299 DepEvents, OutEvent);
1302 OutEvent, OutEventImpl);
1305 void MemoryManager::copy_from_device_global(
1306 const void *DeviceGlobalPtr,
bool IsDeviceImageScoped,
QueueImplPtr Queue,
1307 size_t NumBytes,
size_t Offset,
void *DstMem,
1308 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1312 detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1316 "Invalid copy operation for device_global.");
1318 "Copy from device_global is out of bounds.");
1320 if (IsDeviceImageScoped)
1322 DepEvents, OutEvent);
1325 DepEvents, OutEvent, OutEventImpl);
1329 void MemoryManager::ext_oneapi_copyD2D_cmd_buffer(
1334 unsigned int SrcElemSize,
void *DstMem,
unsigned int DimDst,
1337 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1339 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1340 (void)DstAccessRange;
1342 const PluginPtr &Plugin = Context->getPlugin();
1349 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
1350 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
1351 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
1352 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
1353 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
1355 if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1357 "Images are not supported in Graphs");
1360 if (1 == DimDst && 1 == DimSrc) {
1362 CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1363 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), SrcXOffBytes,
1364 DstXOffBytes, SrcAccessRangeWidthBytes, Deps.size(), Deps.data(),
1370 size_t SrcRowPitch = SrcSzWidthBytes;
1371 size_t SrcSlicePitch = (DimSrc <= 1)
1373 : SrcSzWidthBytes * SrcSize[SrcPos.
YTerm];
1374 size_t DstRowPitch = DstSzWidthBytes;
1375 size_t DstSlicePitch = (DimDst <= 1)
1377 : DstSzWidthBytes * DstSize[DstPos.
YTerm];
1380 SrcOffset[SrcPos.
ZTerm]};
1382 DstOffset[DstPos.
ZTerm]};
1384 SrcAccessRange[SrcPos.
YTerm],
1385 SrcAccessRange[SrcPos.
ZTerm]};
1388 CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1389 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), &SrcOrigin,
1390 &DstOrigin, &Region, SrcRowPitch, SrcSlicePitch, DstRowPitch,
1391 DstSlicePitch, Deps.size(), Deps.data(), OutSyncPoint);
1395 void MemoryManager::ext_oneapi_copyD2H_cmd_buffer(
1400 unsigned int SrcElemSize,
char *DstMem,
unsigned int DimDst,
1402 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1404 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1406 const PluginPtr &Plugin = Context->getPlugin();
1413 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
1414 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
1415 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
1416 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
1417 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
1419 if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1421 "Images are not supported in Graphs");
1424 if (1 == DimDst && 1 == DimSrc) {
1428 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1429 SrcXOffBytes, SrcAccessRangeWidthBytes, DstMem + DstXOffBytes,
1430 Deps.size(), Deps.data(), OutSyncPoint);
1432 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1435 "Device-to-host buffer copy command not supported by graph backend");
1437 Plugin->checkPiResult(Result);
1440 size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1441 size_t BufferSlicePitch =
1442 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
1443 size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1444 size_t HostSlicePitch =
1445 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
1448 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
1450 DstOffset[DstPos.
ZTerm]};
1452 SrcAccessRange[SrcPos.
YTerm],
1453 SrcAccessRange[SrcPos.
ZTerm]};
1458 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1459 &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1460 BufferSlicePitch, HostRowPitch, HostSlicePitch, DstMem, Deps.size(),
1461 Deps.data(), OutSyncPoint);
1462 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1465 "Device-to-host buffer copy command not supported by graph backend");
1467 Plugin->checkPiResult(Result);
1472 void MemoryManager::ext_oneapi_copyH2D_cmd_buffer(
1476 sycl::id<3> SrcOffset,
unsigned int SrcElemSize,
void *DstMem,
1479 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1481 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1483 const PluginPtr &Plugin = Context->getPlugin();
1490 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
1491 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
1492 size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.
XTerm] * DstElemSize;
1493 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
1494 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
1496 if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1498 "Images are not supported in Graphs");
1501 if (1 == DimDst && 1 == DimSrc) {
1505 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1506 DstXOffBytes, DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes,
1507 Deps.size(), Deps.data(), OutSyncPoint);
1509 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1512 "Host-to-device buffer copy command not supported by graph backend");
1514 Plugin->checkPiResult(Result);
1517 size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1518 size_t BufferSlicePitch =
1519 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
1520 size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1521 size_t HostSlicePitch =
1522 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
1525 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
1527 SrcOffset[SrcPos.
ZTerm]};
1529 DstAccessRange[DstPos.
YTerm],
1530 DstAccessRange[DstPos.
ZTerm]};
1535 sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1536 &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1537 BufferSlicePitch, HostRowPitch, HostSlicePitch, SrcMem, Deps.size(),
1538 Deps.data(), OutSyncPoint);
1540 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1543 "Host-to-device buffer copy command not supported by graph backend");
1545 Plugin->checkPiResult(Result);
1550 void MemoryManager::ext_oneapi_copy_usm_cmd_buffer(
1553 void *DstMem, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1555 if (!SrcMem || !DstMem)
1557 "NULL pointer argument in memory copy operation.");
1559 const PluginPtr &Plugin = Context->getPlugin();
1562 CommandBuffer, DstMem, SrcMem, Len, Deps.size(), Deps.data(),
1564 if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1567 "USM copy command not supported by graph backend");
1569 Plugin->checkPiResult(Result);
1573 void MemoryManager::ext_oneapi_fill_usm_cmd_buffer(
1576 size_t Len,
const std::vector<unsigned char> &Pattern,
1577 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1582 "NULL pointer argument in memory fill operation.");
1584 const PluginPtr &Plugin = Context->getPlugin();
1587 CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(),
1588 Deps.data(), OutSyncPoint);
1591 void MemoryManager::ext_oneapi_fill_cmd_buffer(
1594 void *Mem,
size_t PatternSize,
const unsigned char *Pattern,
1596 sycl::id<3> AccessOffset,
unsigned int ElementSize,
1597 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1599 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
1601 const PluginPtr &Plugin = Context->getPlugin();
1602 if (SYCLMemObj->
getType() != detail::SYCLMemObjI::MemObjType::Buffer) {
1604 "Images are not supported in Graphs");
1609 bool RangesUsable = (Dim <= 1) || (Size == AccessRange);
1612 bool OffsetUsable = (Dim <= 1) || (AccessOffset ==
sycl::id<3>{0, 0, 0});
1613 size_t RangeMultiplier = AccessRange[0] * AccessRange[1] * AccessRange[2];
1615 if (RangesUsable && OffsetUsable) {
1617 CommandBuffer, pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
1618 PatternSize, AccessOffset[0] * ElementSize,
1619 RangeMultiplier * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
1625 "Not supported configuration of fill requested");
1628 void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer(
1631 size_t Length, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1633 const PluginPtr &Plugin = Context->getPlugin();
1636 Deps.data(), OutSyncPoint);
1639 void MemoryManager::ext_oneapi_advise_usm_cmd_buffer(
1643 std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1645 const PluginPtr &Plugin = Context->getPlugin();
1647 CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(),
1651 void MemoryManager::copy_image_bindless(
1661 const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1664 "Copy image bindless must be called with a valid device queue");
1671 "Invalid flags passed to copy_image_bindless.");
1675 "NULL pointer argument in bindless image copy operation.");
1679 Queue->getHandleRef(), Dst, Src, &SrcImageDesc, &DestImageDesc,
1680 &SrcImageFormat, &DestImageFormat, Flags, &SrcOffset, &DstOffset,
1681 &CopyExtent, DepEvents.size(), DepEvents.data(), OutEvent);
The context class represents a SYCL context on which kernel functions may be executed.
static void * allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range< 3 > Range, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateMemImage(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateImageObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const sycl::property_list &PropsList)
static void * allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::property_list &PropsList)
static void * allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const size_t Size, const sycl::property_list &PropsList)
static void * allocateMemBuffer(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
virtual void * allocateHostMem()=0
virtual void releaseMem(ContextImplPtr Context, void *Ptr)=0
virtual MemObjType getType() const =0
virtual void releaseHostMem(void *Ptr)=0
virtual void * allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, sycl::detail::pi::PiEvent &InteropEvent)=0
static void bufferAssociateNotification(const void *, const void *)
static void bufferReleaseNotification(const void *, const void *)
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
A unique identifier of an item in an index space.
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
PropT get_property() const
::pi_mem_flags PiMemFlags
::pi_ext_sync_point PiExtSyncPoint
static void memcpyToDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone)
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
static void memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
void prepTermPositions(TermPositions &pos, int Dimensions, detail::SYCLMemObjI::MemObjType Type)
void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent >, sycl::detail::pi::PiEvent &, const detail::EventImplPtr &)
void memUnmapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static void waitForEvents(const std::vector< EventImplPtr > &Events)
void memBufferMapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
exception set_pi_error(exception &&e, pi_int32 pi_err)
void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem)
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr)
static void memcpyToDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static sycl::detail::pi::PiProgram getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry)
void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, size_t AllocSize, size_t GuardZone, uint64_t CorrelationID)
void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, uint64_t CorrelationID)
void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 >, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void memcpyFromDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
static sycl::detail::pi::PiMemFlags getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly)
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
void unmap(const void *Ptr, size_t NumBytes, const context &SyclContext)
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
constexpr mode_tag_t< access_mode::read_write > read_write
class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY
uintptr_t pi_native_handle
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextUSMEnqueueFill(pi_queue queue, void *ptr, const void *pattern, size_t patternSize, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Fill API.
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_bitfield pi_mem_properties
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextCommandBufferMemBufferCopy(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer copy command to the command-buffer.
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Device global variable.
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextCommandBufferMemBufferReadRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer read command to the command-buffer.
pi_result piextCommandBufferMemBufferRead(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer read command to the command-buffer.
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
pi_result piextCommandBufferMemBufferFill(pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer fill command to the command-buffer.
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_BUFFER_CREATE_TYPE_REGION
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D fill API.
pi_result piextCommandBufferMemBufferCopyRect(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer copy command to the command-buffer.
pi_result piMemRelease(pi_mem mem)
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
constexpr pi_map_flags PI_MAP_WRITE
constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM memcpy command to the command-buffer.
constexpr pi_map_flags PI_MAP_READ
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
pi_result piextCommandBufferMemBufferWrite(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer write command to the command-buffer.
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
pi_result piextMemImageCopy(pi_queue queue, void *dst_ptr, const void *src_ptr, const pi_image_desc *src_image_desc, const pi_image_desc *dst_image_desc, const pi_image_format *src_image_format, const pi_image_format *dst_image_format, const pi_image_copy_flags flags, pi_image_offset src_offset, pi_image_offset dst_offset, pi_image_region copy_extent, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to copy image data Host to Device or Device to Host.
pi_result piextCommandBufferFillUSM(pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM fill command to the command-buffer.
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
pi_result piextCommandBufferAdviseUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t length, pi_mem_advice advice, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Advise command to the command-buffer.
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
pi_result piextCommandBufferMemBufferWriteRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer write command to the command-buffer.
pi_result piextCommandBufferPrefetchUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Prefetch command to the command-buffer.
pi_result piEventRetain(pi_event event)
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL
bool MIsDeviceImageScopeDecorated
std::set< std::uintptr_t > MImageIdentifiers
DeviceGlobalUSMMem & getOrAllocateDeviceGlobalUSM(const std::shared_ptr< queue_impl > &QueueImpl)
std::uint32_t MDeviceGlobalTSize
std::unordered_set< RTDeviceBinaryImage * > MImages
OwnedPiEvent getInitEvent(const PluginPtr &Plugin)
void *const & getPtr() const noexcept
sycl::detail::pi::PiEvent GetEvent()