21 #ifdef XPTI_ENABLE_INSTRUMENTATION
22 #include <xpti/xpti_data_types.h>
23 #include <xpti/xpti_trace_framework.hpp>
30 #ifdef XPTI_ENABLE_INSTRUMENTATION
31 uint8_t GMemAllocStreamID;
32 xpti::trace_event_data_t *GMemAllocEvent;
40 uint64_t CorrelationID = 0;
41 #ifdef XPTI_ENABLE_INSTRUMENTATION
42 if (xptiTraceEnabled()) {
43 xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 , AllocSize,
46 CorrelationID = xptiGetUniqueId();
47 xptiNotifySubscribers(
49 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_alloc_begin),
50 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
57 size_t AllocSize,
size_t GuardZone,
58 uint64_t CorrelationID) {
64 #ifdef XPTI_ENABLE_INSTRUMENTATION
65 if (xptiTraceEnabled()) {
66 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
68 xptiNotifySubscribers(
70 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_alloc_end),
71 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
79 uint64_t CorrelationID = 0;
80 #ifdef XPTI_ENABLE_INSTRUMENTATION
81 if (xptiTraceEnabled()) {
82 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 ,
85 CorrelationID = xptiGetUniqueId();
86 xptiNotifySubscribers(
88 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_release_begin),
89 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
96 uint64_t CorrelationID) {
100 #ifdef XPTI_ENABLE_INSTRUMENTATION
101 if (xptiTraceEnabled()) {
102 xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 ,
105 xptiNotifySubscribers(
107 static_cast<uint16_t
>(xpti::trace_point_type_t::mem_release_end),
108 GMemAllocEvent,
nullptr, CorrelationID, &MemAlloc);
116 if (!Events.empty()) {
118 std::vector<RT::PiEvent> PiEvents(Events.size());
119 std::transform(Events.begin(), Events.end(), PiEvents.begin(),
121 return EventImpl->getHandleRef();
130 #ifdef XPTI_ENABLE_INSTRUMENTATION
135 #ifdef XPTI_ENABLE_INSTRUMENTATION
138 xpti::utils::finally _{[&] {
140 uintptr_t MemObjID = (uintptr_t)(*RetMem);
159 #ifdef XPTI_ENABLE_INSTRUMENTATION
162 uintptr_t MemObjID = (uintptr_t)(Mem);
165 if (xptiTraceEnabled()) {
168 Ptr = (uintptr_t)(PtrHandle);
173 #ifdef XPTI_ENABLE_INSTRUMENTATION
175 xpti::utils::finally _{
187 #ifdef XPTI_ENABLE_INSTRUMENTATION
189 uintptr_t MemObjID = (uintptr_t)(Buffer);
193 #ifdef XPTI_ENABLE_INSTRUMENTATION
195 xpti::utils::finally _{[&] {
201 Queue, Buffer, Blocking, Flags, Offset, Size, NumEvents, WaitList,
209 #ifdef XPTI_ENABLE_INSTRUMENTATION
211 uintptr_t MemObjID = (uintptr_t)(Mem);
212 uintptr_t Ptr = (uintptr_t)(MappedPtr);
216 #ifdef XPTI_ENABLE_INSTRUMENTATION
218 xpti::utils::finally _{[&] {
236 std::vector<EventImplPtr> DepEvents,
242 XPTIRegistry::bufferReleaseNotification(MemObj, MemAllocation);
243 MemObj->
releaseMem(TargetContext, MemAllocation);
257 if (UserPtr == MemAllocation) {
262 if (TargetContext->is_host()) {
272 bool InitFromUserData,
void *HostPtr,
273 std::vector<EventImplPtr> DepEvents,
280 return MemObj->
allocateMem(TargetContext, InitFromUserData, HostPtr,
293 void *MemoryManager::allocateHostMemory(
SYCLMemObjI *MemObj,
void *UserPtr,
294 bool HostPtrReadOnly,
size_t Size,
297 if (UserPtr && HostPtrReadOnly ==
false)
303 if (UserPtr && HostPtrReadOnly ==
true)
304 std::memcpy((
char *)NewMem, (
char *)UserPtr, Size);
308 void *MemoryManager::allocateInteropMemObject(
313 (void)InteropContext;
315 assert(TargetContext == InteropContext &&
"Expected matching contexts");
316 OutEventToWait = InteropEvent->getHandleRef();
319 if (
nullptr != OutEventToWait) {
327 bool HostPtrReadOnly) {
338 void *UserPtr,
bool HostPtrReadOnly,
348 CreationFlags, &Format, &Desc,
355 bool HostPtrReadOnly,
const size_t Size,
367 if (TargetContext->isBufferLocationSupported()) {
370 .get_buffer_location();
374 CreationFlags, Size, UserPtr, &NewMem, props);
378 Size, UserPtr, &NewMem,
nullptr);
384 bool HostPtrReadOnly,
size_t Size,
390 if (TargetContext->is_host())
392 allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
393 else if (UserPtr && InteropContext)
395 allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
396 InteropContext, PropsList, OutEventToWait);
398 MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
400 XPTIRegistry::bufferAssociateNotification(MemObj, MemPtr);
404 void *MemoryManager::allocateMemImage(
410 if (TargetContext->is_host())
411 return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
413 if (UserPtr && InteropContext)
414 return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
415 InteropContext, PropsList, OutEventToWait);
416 return allocateImageObject(TargetContext, UserPtr, HostPtrReadOnly, Desc,
421 void *ParentMemObj,
size_t ElemSize,
423 std::vector<EventImplPtr> DepEvents,
428 if (TargetContext->is_host())
429 return static_cast<void *
>(
static_cast<char *
>(ParentMemObj) + Offset);
431 size_t SizeInBytes = ElemSize;
432 for (
size_t I = 0; I < 3; ++I)
433 SizeInBytes *= Range[I];
443 throw invalid_object_error(
444 "Specified offset of the sub-buffer being constructed is not a "
445 "multiple of the memory base address alignment",
469 if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
491 unsigned int DstElemSize, std::vector<RT::PiEvent> DepEvents,
493 (void)SrcAccessRange;
494 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
496 const RT::PiQueue Queue = TgtQueue->getHandleRef();
504 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
505 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
506 size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.
XTerm] * DstElemSize;
507 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
508 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
510 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
511 if (1 == DimDst && 1 == DimSrc) {
514 CL_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
515 SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
517 size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
518 size_t BufferSlicePitch =
519 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
520 size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
521 size_t HostSlicePitch =
522 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
525 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
527 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
529 DstAccessRange[DstPos.
YTerm],
530 DstAccessRange[DstPos.
ZTerm]};
534 CL_FALSE, &BufferOffset, &HostOffset, &RectRegion,
535 BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
536 SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
539 size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
540 size_t InputSlicePitch =
541 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
544 DstOffset[DstPos.YTerm],
545 DstOffset[DstPos.ZTerm]};
547 DstAccessRange[DstPos.YTerm],
548 DstAccessRange[DstPos.ZTerm]};
552 CL_FALSE, &Origin, &Region, InputRowPitch,
553 InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
563 unsigned int DstElemSize, std::vector<RT::PiEvent> DepEvents,
565 (void)DstAccessRange;
566 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
568 const RT::PiQueue Queue = SrcQueue->getHandleRef();
582 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
583 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
584 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
585 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
586 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
588 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
589 if (1 == DimDst && 1 == DimSrc) {
592 CL_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
593 DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
595 size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
596 size_t BufferSlicePitch =
597 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.
YTerm] : 0;
598 size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
599 size_t HostSlicePitch =
600 (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.
YTerm] : 0;
603 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
605 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
607 SrcAccessRange[SrcPos.
YTerm],
608 SrcAccessRange[SrcPos.
ZTerm]};
612 CL_FALSE, &BufferOffset, &HostOffset, &RectRegion,
613 BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
614 DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
617 size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
619 (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
622 SrcOffset[SrcPos.YTerm],
623 SrcOffset[SrcPos.ZTerm]};
625 SrcAccessRange[SrcPos.YTerm],
626 SrcAccessRange[SrcPos.ZTerm]};
629 Queue, SrcMem, CL_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
630 DepEvents.size(), DepEvents.data(), &OutEvent);
640 std::vector<RT::PiEvent> DepEvents,
RT::PiEvent &OutEvent) {
641 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
643 const RT::PiQueue Queue = SrcQueue->getHandleRef();
651 size_t DstXOffBytes = DstOffset[DstPos.
XTerm] * DstElemSize;
652 size_t SrcXOffBytes = SrcOffset[SrcPos.
XTerm] * SrcElemSize;
653 size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.
XTerm] * SrcElemSize;
654 size_t DstSzWidthBytes = DstSize[DstPos.
XTerm] * DstElemSize;
655 size_t SrcSzWidthBytes = SrcSize[SrcPos.
XTerm] * SrcElemSize;
657 if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
658 if (1 == DimDst && 1 == DimSrc) {
660 Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
661 SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
667 size_t SrcRowPitch = SrcSzWidthBytes;
668 size_t SrcSlicePitch = (DimSrc <= 1)
670 : SrcSzWidthBytes * SrcSize[SrcPos.
YTerm];
671 size_t DstRowPitch = DstSzWidthBytes;
672 size_t DstSlicePitch = (DimDst <= 1)
674 : DstSzWidthBytes * DstSize[DstPos.
YTerm];
677 SrcXOffBytes, SrcOffset[SrcPos.
YTerm], SrcOffset[SrcPos.
ZTerm]};
679 DstXOffBytes, DstOffset[DstPos.
YTerm], DstOffset[DstPos.
ZTerm]};
681 SrcAccessRange[SrcPos.
YTerm],
682 SrcAccessRange[SrcPos.
ZTerm]};
685 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
686 SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
687 DepEvents.data(), &OutEvent);
691 SrcOffset[SrcPos.
YTerm],
692 SrcOffset[SrcPos.
ZTerm]};
694 DstOffset[DstPos.
YTerm],
695 DstOffset[DstPos.
ZTerm]};
697 SrcAccessRange[SrcPos.
YTerm],
698 SrcAccessRange[SrcPos.
ZTerm]};
701 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
702 DepEvents.size(), DepEvents.data(), &OutEvent);
712 unsigned int DstElemSize, std::vector<RT::PiEvent>,
714 if ((DimSrc != 1 || DimDst != 1) &&
715 (SrcOffset !=
id<3>{0, 0, 0} || DstOffset !=
id<3>{0, 0, 0} ||
716 SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
717 throw runtime_error(
"Not supported configuration of memcpy requested",
721 SrcMem += SrcOffset[0] * SrcElemSize;
722 DstMem += DstOffset[0] * DstElemSize;
724 if (SrcMem == DstMem)
728 SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
741 unsigned int DstElemSize,
742 std::vector<RT::PiEvent> DepEvents,
745 if (SrcQueue->is_host()) {
746 if (TgtQueue->is_host())
747 copyH2H(SYCLMemObj, (
char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
748 SrcAccessRange, SrcOffset, SrcElemSize, (
char *)DstMem,
749 std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
750 DstElemSize, std::move(DepEvents), OutEvent);
753 copyH2D(SYCLMemObj, (
char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
754 SrcAccessRange, SrcOffset, SrcElemSize,
755 pi::cast<RT::PiMem>(DstMem), std::move(TgtQueue), DimDst, DstSize,
756 DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
759 if (TgtQueue->is_host())
760 copyD2H(SYCLMemObj, pi::cast<RT::PiMem>(SrcMem), std::move(SrcQueue),
761 DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize,
762 (
char *)DstMem, std::move(TgtQueue), DimDst, DstSize,
763 DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
766 copyD2D(SYCLMemObj, pi::cast<RT::PiMem>(SrcMem), std::move(SrcQueue),
767 DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize,
768 pi::cast<RT::PiMem>(DstMem), std::move(TgtQueue), DimDst, DstSize,
769 DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
775 size_t PatternSize,
const char *Pattern,
778 std::vector<RT::PiEvent> DepEvents,
780 assert(SYCLMemObj &&
"The SYCLMemObj is nullptr");
783 if (SYCLMemObj->
getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
786 Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem), Pattern, PatternSize,
787 Offset[0] * ElementSize, Range[0] * ElementSize, DepEvents.
size(),
788 DepEvents.data(), &OutEvent);
791 throw runtime_error(
"Not supported configuration of fill requested",
795 Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem), Pattern, &Offset[0],
796 &Range[0], DepEvents.
size(), DepEvents.data(), &OutEvent);
803 unsigned int ElementSize,
804 std::vector<RT::PiEvent> DepEvents,
806 if (Queue->is_host()) {
807 throw runtime_error(
"Not supported configuration of map requested",
813 switch (AccessMode) {
814 case access::mode::read:
820 case access::mode::read_write:
821 case access::mode::atomic:
824 case access::mode::discard_write:
825 case access::mode::discard_read_write:
830 AccessOffset[0] *= ElementSize;
831 AccessRange[0] *= ElementSize;
834 assert(AccessOffset[0] == 0 &&
"Handle offset");
836 void *MappedPtr =
nullptr;
837 const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
840 CL_FALSE, Flags, AccessOffset[0], BytesToMap,
841 DepEvents.size(), DepEvents.data(), &OutEvent, &MappedPtr);
846 void *MappedPtr, std::vector<RT::PiEvent> DepEvents,
854 memUnmapHelper(Plugin, Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem),
855 MappedPtr, DepEvents.size(), DepEvents.data(), &OutEvent);
858 void MemoryManager::copy_usm(
const void *SrcMem,
QueueImplPtr SrcQueue,
859 size_t Len,
void *DstMem,
860 std::vector<RT::PiEvent> DepEvents,
865 if (!Context.
is_host() && !DepEvents.empty()) {
867 SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
873 if (!SrcMem || !DstMem)
874 throw runtime_error(
"NULL pointer argument in memory copy operation.",
883 SrcMem, Len, DepEvents.size(),
884 DepEvents.data(), OutEvent);
888 void MemoryManager::fill_usm(
void *Mem,
QueueImplPtr Queue,
size_t Length,
889 int Pattern, std::vector<RT::PiEvent> DepEvents,
894 if (!Context.
is_host() && !DepEvents.empty()) {
896 Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
902 throw runtime_error(
"NULL pointer argument in memory fill operation.",
906 std::memset(Mem, Pattern, Length);
910 Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(),
911 DepEvents.data(), OutEvent);
915 void MemoryManager::prefetch_usm(
void *Mem,
QueueImplPtr Queue,
size_t Length,
916 std::vector<RT::PiEvent> DepEvents,
926 DepEvents.size(), DepEvents.data(), OutEvent);
932 std::vector<RT::PiEvent> ,
939 Length, Advice, OutEvent);
944 void MemoryManager::copy_usm(
const void *SrcMem,
QueueImplPtr Queue,
size_t Len,
945 void *DstMem, std::vector<RT::PiEvent> DepEvents,
947 copy_usm(SrcMem, Queue, Len, DstMem, DepEvents, &OutEvent);
951 void MemoryManager::fill_usm(
void *DstMem,
QueueImplPtr Queue,
size_t Len,
952 int Pattern, std::vector<RT::PiEvent> DepEvents,
954 fill_usm(DstMem, Queue, Len, Pattern, DepEvents, &OutEvent);
958 void MemoryManager::prefetch_usm(
void *Ptr,
QueueImplPtr Queue,
size_t Len,
959 std::vector<RT::PiEvent> DepEvents,
961 prefetch_usm(Ptr, Queue, Len, DepEvents, &OutEvent);
965 void MemoryManager::advise_usm(
const void *Ptr,
QueueImplPtr Queue,
size_t Len,
967 std::vector<RT::PiEvent> DepEvents,
969 advise_usm(Ptr, Queue, Len, Advice, DepEvents, &OutEvent);