DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_manager.cpp
Go to the documentation of this file.
1 //==-------------- memory_manager.cpp --------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <detail/context_impl.hpp>
11 #include <detail/event_impl.hpp>
13 #include <detail/queue_impl.hpp>
14 #include <detail/xpti_registry.hpp>
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <cstring>
19 #include <vector>
20 
21 #ifdef XPTI_ENABLE_INSTRUMENTATION
22 #include <xpti/xpti_data_types.h>
23 #include <xpti/xpti_trace_framework.hpp>
24 #endif
25 
27 namespace sycl {
28 namespace detail {
29 
30 #ifdef XPTI_ENABLE_INSTRUMENTATION
31 uint8_t GMemAllocStreamID;
32 xpti::trace_event_data_t *GMemAllocEvent;
33 #endif
34 
35 uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize,
36  size_t GuardZone) {
37  (void)ObjHandle;
38  (void)AllocSize;
39  (void)GuardZone;
40  uint64_t CorrelationID = 0;
41 #ifdef XPTI_ENABLE_INSTRUMENTATION
42  if (xptiTraceEnabled()) {
43  xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 /* alloc ptr */, AllocSize,
44  GuardZone};
45 
46  CorrelationID = xptiGetUniqueId();
47  xptiNotifySubscribers(
48  GMemAllocStreamID,
49  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_begin),
50  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
51  }
52 #endif
53  return CorrelationID;
54 }
55 
56 void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
57  size_t AllocSize, size_t GuardZone,
58  uint64_t CorrelationID) {
59  (void)ObjHandle;
60  (void)AllocPtr;
61  (void)AllocSize;
62  (void)GuardZone;
63  (void)CorrelationID;
64 #ifdef XPTI_ENABLE_INSTRUMENTATION
65  if (xptiTraceEnabled()) {
66  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
67 
68  xptiNotifySubscribers(
69  GMemAllocStreamID,
70  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_end),
71  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
72  }
73 #endif
74 }
75 
76 uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr) {
77  (void)ObjHandle;
78  (void)AllocPtr;
79  uint64_t CorrelationID = 0;
80 #ifdef XPTI_ENABLE_INSTRUMENTATION
81  if (xptiTraceEnabled()) {
82  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
83  0 /* guard zone */};
84 
85  CorrelationID = xptiGetUniqueId();
86  xptiNotifySubscribers(
87  GMemAllocStreamID,
88  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_begin),
89  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
90  }
91 #endif
92  return CorrelationID;
93 }
94 
95 void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
96  uint64_t CorrelationID) {
97  (void)ObjHandle;
98  (void)AllocPtr;
99  (void)CorrelationID;
100 #ifdef XPTI_ENABLE_INSTRUMENTATION
101  if (xptiTraceEnabled()) {
102  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
103  0 /* guard zone */};
104 
105  xptiNotifySubscribers(
106  GMemAllocStreamID,
107  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_end),
108  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
109  }
110 #endif
111 }
112 
113 static void waitForEvents(const std::vector<EventImplPtr> &Events) {
114  // Assuming all events will be on the same device or
115  // devices associated with the same Backend.
116  if (!Events.empty()) {
117  const detail::plugin &Plugin = Events[0]->getPlugin();
118  std::vector<RT::PiEvent> PiEvents(Events.size());
119  std::transform(Events.begin(), Events.end(), PiEvents.begin(),
120  [](const EventImplPtr &EventImpl) {
121  return EventImpl->getHandleRef();
122  });
123  Plugin.call<PiApiKind::piEventsWait>(PiEvents.size(), &PiEvents[0]);
124  }
125 }
126 
127 void memBufferCreateHelper(const plugin &Plugin, pi_context Ctx,
128  pi_mem_flags Flags, size_t Size, void *HostPtr,
129  pi_mem *RetMem, const pi_mem_properties *Props) {
130 #ifdef XPTI_ENABLE_INSTRUMENTATION
131  uint64_t CorrID = 0;
132 #endif
133  // We only want to instrument piMemBufferCreate
134  {
135 #ifdef XPTI_ENABLE_INSTRUMENTATION
136  CorrID =
137  emitMemAllocBeginTrace(0 /* mem object */, Size, 0 /* guard zone */);
138  xpti::utils::finally _{[&] {
139  // C-style cast is required for MSVC
140  uintptr_t MemObjID = (uintptr_t)(*RetMem);
141  pi_native_handle Ptr = 0;
142  // Always use call_nocheck here, because call may throw an exception,
143  // and this lambda will be called from destructor, which in combination
144  // rewards us with UB.
145  Plugin.call_nocheck<PiApiKind::piextMemGetNativeHandle>(*RetMem, &Ptr);
146  emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */,
147  CorrID);
148  }};
149 #endif
150  Plugin.call<PiApiKind::piMemBufferCreate>(Ctx, Flags, Size, HostPtr, RetMem,
151  Props);
152  }
153 }
154 
155 void memReleaseHelper(const plugin &Plugin, pi_mem Mem) {
156  // FIXME piMemRelease does not guarante memory release. It is only true if
157  // reference counter is 1. However, SYCL runtime currently only calls
158  // piMemRetain only for OpenCL interop
159 #ifdef XPTI_ENABLE_INSTRUMENTATION
160  uint64_t CorrID = 0;
161  // C-style cast is required for MSVC
162  uintptr_t MemObjID = (uintptr_t)(Mem);
163  uintptr_t Ptr = 0;
164  // Do not make unnecessary PI calls without instrumentation enabled
165  if (xptiTraceEnabled()) {
166  pi_native_handle PtrHandle = 0;
167  Plugin.call<PiApiKind::piextMemGetNativeHandle>(Mem, &PtrHandle);
168  Ptr = (uintptr_t)(PtrHandle);
169  }
170 #endif
171  // We only want to instrument piMemRelease
172  {
173 #ifdef XPTI_ENABLE_INSTRUMENTATION
174  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
175  xpti::utils::finally _{
176  [&] { emitMemReleaseEndTrace(MemObjID, Ptr, CorrID); }};
177 #endif
178  Plugin.call<PiApiKind::piMemRelease>(Mem);
179  }
180 }
181 
182 void memBufferMapHelper(const plugin &Plugin, pi_queue Queue, pi_mem Buffer,
183  pi_bool Blocking, pi_map_flags Flags, size_t Offset,
184  size_t Size, pi_uint32 NumEvents,
185  const pi_event *WaitList, pi_event *Event,
186  void **RetMap) {
187 #ifdef XPTI_ENABLE_INSTRUMENTATION
188  uint64_t CorrID = 0;
189  uintptr_t MemObjID = (uintptr_t)(Buffer);
190 #endif
191  // We only want to instrument piEnqueueMemBufferMap
192  {
193 #ifdef XPTI_ENABLE_INSTRUMENTATION
194  CorrID = emitMemAllocBeginTrace(MemObjID, Size, 0 /* guard zone */);
195  xpti::utils::finally _{[&] {
196  emitMemAllocEndTrace(MemObjID, (uintptr_t)(*RetMap), Size,
197  0 /* guard zone */, CorrID);
198  }};
199 #endif
201  Queue, Buffer, Blocking, Flags, Offset, Size, NumEvents, WaitList,
202  Event, RetMap);
203  }
204 }
205 
206 void memUnmapHelper(const plugin &Plugin, pi_queue Queue, pi_mem Mem,
207  void *MappedPtr, pi_uint32 NumEvents,
208  const pi_event *WaitList, pi_event *Event) {
209 #ifdef XPTI_ENABLE_INSTRUMENTATION
210  uint64_t CorrID = 0;
211  uintptr_t MemObjID = (uintptr_t)(Mem);
212  uintptr_t Ptr = (uintptr_t)(MappedPtr);
213 #endif
214  // We only want to instrument piEnqueueMemUnmap
215  {
216 #ifdef XPTI_ENABLE_INSTRUMENTATION
217  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
218  xpti::utils::finally _{[&] {
219  // There's no way for SYCL to know, when the pointer is freed, so we have
220  // to explicitly wait for the end of data transfers here in order to
221  // report correct events.
222  // Always use call_nocheck here, because call may throw an exception,
223  // and this lambda will be called from destructor, which in combination
224  // rewards us with UB.
225  Plugin.call_nocheck<PiApiKind::piEventsWait>(1, Event);
226  emitMemReleaseEndTrace(MemObjID, Ptr, CorrID);
227  }};
228 #endif
229  Plugin.call<PiApiKind::piEnqueueMemUnmap>(Queue, Mem, MappedPtr, NumEvents,
230  WaitList, Event);
231  }
232 }
233 
234 void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
235  void *MemAllocation,
236  std::vector<EventImplPtr> DepEvents,
237  RT::PiEvent &OutEvent) {
238  // There is no async API for memory releasing. Explicitly wait for all
239  // dependency events and return empty event.
240  waitForEvents(DepEvents);
241  OutEvent = nullptr;
242  XPTIRegistry::bufferReleaseNotification(MemObj, MemAllocation);
243  MemObj->releaseMem(TargetContext, MemAllocation);
244 }
245 
246 void MemoryManager::releaseImageBuffer(ContextImplPtr TargetContext,
247  void *ImageBuf) {
248  (void)TargetContext;
249  (void)ImageBuf;
250  // TODO remove when ABI breaking changes are allowed.
251  throw runtime_error("Deprecated release operation", PI_INVALID_OPERATION);
252 }
253 
254 void MemoryManager::releaseMemObj(ContextImplPtr TargetContext,
255  SYCLMemObjI *MemObj, void *MemAllocation,
256  void *UserPtr) {
257  if (UserPtr == MemAllocation) {
258  // Do nothing as it's user provided memory.
259  return;
260  }
261 
262  if (TargetContext->is_host()) {
263  MemObj->releaseHostMem(MemAllocation);
264  return;
265  }
266 
267  const detail::plugin &Plugin = TargetContext->getPlugin();
268  memReleaseHelper(Plugin, pi::cast<RT::PiMem>(MemAllocation));
269 }
270 
271 void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
272  bool InitFromUserData, void *HostPtr,
273  std::vector<EventImplPtr> DepEvents,
274  RT::PiEvent &OutEvent) {
275  // There is no async API for memory allocation. Explicitly wait for all
276  // dependency events and return empty event.
277  waitForEvents(DepEvents);
278  OutEvent = nullptr;
279 
280  return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr,
281  OutEvent);
282 }
283 
284 void *MemoryManager::wrapIntoImageBuffer(ContextImplPtr TargetContext,
285  void *MemBuf, SYCLMemObjI *MemObj) {
286  (void)TargetContext;
287  (void)MemBuf;
288  (void)MemObj;
289  // TODO remove when ABI breaking changes are allowed.
290  throw runtime_error("Deprecated allocation operation", PI_INVALID_OPERATION);
291 }
292 
293 void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
294  bool HostPtrReadOnly, size_t Size,
295  const sycl::property_list &) {
296  // Can return user pointer directly if it points to writable memory.
297  if (UserPtr && HostPtrReadOnly == false)
298  return UserPtr;
299 
300  void *NewMem = MemObj->allocateHostMem();
301  // Need to initialize new memory if user provides pointer to read only
302  // memory.
303  if (UserPtr && HostPtrReadOnly == true)
304  std::memcpy((char *)NewMem, (char *)UserPtr, Size);
305  return NewMem;
306 }
307 
308 void *MemoryManager::allocateInteropMemObject(
309  ContextImplPtr TargetContext, void *UserPtr,
310  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
311  const sycl::property_list &, RT::PiEvent &OutEventToWait) {
312  (void)TargetContext;
313  (void)InteropContext;
314  // If memory object is created with interop c'tor return cl_mem as is.
315  assert(TargetContext == InteropContext && "Expected matching contexts");
316  OutEventToWait = InteropEvent->getHandleRef();
317  // Retain the event since it will be released during alloca command
318  // destruction
319  if (nullptr != OutEventToWait) {
320  const detail::plugin &Plugin = InteropEvent->getPlugin();
321  Plugin.call<PiApiKind::piEventRetain>(OutEventToWait);
322  }
323  return UserPtr;
324 }
325 
327  bool HostPtrReadOnly) {
328  // Create read_write mem object to handle arbitrary uses.
329  RT::PiMemFlags Result =
331  if (UserPtr)
332  Result |= HostPtrReadOnly ? PI_MEM_FLAGS_HOST_PTR_COPY
334  return Result;
335 }
336 
337 void *MemoryManager::allocateImageObject(ContextImplPtr TargetContext,
338  void *UserPtr, bool HostPtrReadOnly,
339  const RT::PiMemImageDesc &Desc,
340  const RT::PiMemImageFormat &Format,
341  const sycl::property_list &) {
342  RT::PiMemFlags CreationFlags =
343  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
344 
345  RT::PiMem NewMem;
346  const detail::plugin &Plugin = TargetContext->getPlugin();
347  Plugin.call<PiApiKind::piMemImageCreate>(TargetContext->getHandleRef(),
348  CreationFlags, &Format, &Desc,
349  UserPtr, &NewMem);
350  return NewMem;
351 }
352 
353 void *
354 MemoryManager::allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr,
355  bool HostPtrReadOnly, const size_t Size,
356  const sycl::property_list &PropsList) {
357  RT::PiMemFlags CreationFlags =
358  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
359  if (PropsList.has_property<
361  CreationFlags |= PI_MEM_FLAGS_HOST_PTR_ALLOC;
362 
363  RT::PiMem NewMem = nullptr;
364  const detail::plugin &Plugin = TargetContext->getPlugin();
365 
367  if (TargetContext->isBufferLocationSupported()) {
368  auto location =
370  .get_buffer_location();
372  location, 0};
373  memBufferCreateHelper(Plugin, TargetContext->getHandleRef(),
374  CreationFlags, Size, UserPtr, &NewMem, props);
375  return NewMem;
376  }
377  memBufferCreateHelper(Plugin, TargetContext->getHandleRef(), CreationFlags,
378  Size, UserPtr, &NewMem, nullptr);
379  return NewMem;
380 }
381 
382 void *MemoryManager::allocateMemBuffer(ContextImplPtr TargetContext,
383  SYCLMemObjI *MemObj, void *UserPtr,
384  bool HostPtrReadOnly, size_t Size,
385  const EventImplPtr &InteropEvent,
386  const ContextImplPtr &InteropContext,
387  const sycl::property_list &PropsList,
388  RT::PiEvent &OutEventToWait) {
389  void *MemPtr;
390  if (TargetContext->is_host())
391  MemPtr =
392  allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
393  else if (UserPtr && InteropContext)
394  MemPtr =
395  allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
396  InteropContext, PropsList, OutEventToWait);
397  else
398  MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
399  PropsList);
400  XPTIRegistry::bufferAssociateNotification(MemObj, MemPtr);
401  return MemPtr;
402 }
403 
404 void *MemoryManager::allocateMemImage(
405  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
406  bool HostPtrReadOnly, size_t Size, const RT::PiMemImageDesc &Desc,
407  const RT::PiMemImageFormat &Format, const EventImplPtr &InteropEvent,
408  const ContextImplPtr &InteropContext, const sycl::property_list &PropsList,
409  RT::PiEvent &OutEventToWait) {
410  if (TargetContext->is_host())
411  return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
412  PropsList);
413  if (UserPtr && InteropContext)
414  return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
415  InteropContext, PropsList, OutEventToWait);
416  return allocateImageObject(TargetContext, UserPtr, HostPtrReadOnly, Desc,
417  Format, PropsList);
418 }
419 
420 void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
421  void *ParentMemObj, size_t ElemSize,
422  size_t Offset, range<3> Range,
423  std::vector<EventImplPtr> DepEvents,
424  RT::PiEvent &OutEvent) {
425  waitForEvents(DepEvents);
426  OutEvent = nullptr;
427 
428  if (TargetContext->is_host())
429  return static_cast<void *>(static_cast<char *>(ParentMemObj) + Offset);
430 
431  size_t SizeInBytes = ElemSize;
432  for (size_t I = 0; I < 3; ++I)
433  SizeInBytes *= Range[I];
434 
435  RT::PiResult Error = PI_SUCCESS;
436  pi_buffer_region_struct Region{Offset, SizeInBytes};
437  RT::PiMem NewMem;
438  const detail::plugin &Plugin = TargetContext->getPlugin();
440  pi::cast<RT::PiMem>(ParentMemObj), PI_MEM_FLAGS_ACCESS_RW,
441  PI_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem);
442  if (Error == PI_MISALIGNED_SUB_BUFFER_OFFSET)
443  throw invalid_object_error(
444  "Specified offset of the sub-buffer being constructed is not a "
445  "multiple of the memory base address alignment",
447 
448  if (Error != PI_SUCCESS) {
449  Plugin.reportPiError(Error, "allocateMemSubBuffer()");
450  }
451 
452  return NewMem;
453 }
454 
456  int XTerm;
457  int YTerm;
458  int ZTerm;
459 };
462  // For buffers, the offsets/ranges coming from accessor are always
463  // id<3>/range<3> But their organization varies by dimension:
464  // 1 ==> {width, 1, 1}
465  // 2 ==> {height, width, 1}
466  // 3 ==> {depth, height, width}
467  // Some callers schedule 0 as DimDst/DimSrc.
468 
469  if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
470  if (Dimensions == 3) {
471  pos.XTerm = 2, pos.YTerm = 1, pos.ZTerm = 0;
472  } else if (Dimensions == 2) {
473  pos.XTerm = 1, pos.YTerm = 0, pos.ZTerm = 2;
474  } else { // Dimension is 1 or 0
475  pos.XTerm = 0, pos.YTerm = 1, pos.ZTerm = 2;
476  }
477  } else { // While range<>/id<> use by images is different than buffers, it's
478  // consistent with their accessors.
479  pos.XTerm = 0;
480  pos.YTerm = 1;
481  pos.ZTerm = 2;
482  }
483 }
484 
485 void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
486  unsigned int DimSrc, sycl::range<3> SrcSize,
487  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
488  unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr TgtQueue,
489  unsigned int DimDst, sycl::range<3> DstSize,
490  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
491  unsigned int DstElemSize, std::vector<RT::PiEvent> DepEvents,
492  RT::PiEvent &OutEvent) {
493  (void)SrcAccessRange;
494  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
495 
496  const RT::PiQueue Queue = TgtQueue->getHandleRef();
497  const detail::plugin &Plugin = TgtQueue->getPlugin();
498 
499  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
500  TermPositions SrcPos, DstPos;
501  prepTermPositions(SrcPos, DimSrc, MemType);
502  prepTermPositions(DstPos, DimDst, MemType);
503 
504  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
505  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
506  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
507  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
508  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
509 
510  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
511  if (1 == DimDst && 1 == DimSrc) {
513  Queue, DstMem,
514  /*blocking_write=*/CL_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
515  SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
516  } else {
517  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
518  size_t BufferSlicePitch =
519  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
520  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
521  size_t HostSlicePitch =
522  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
523 
524  pi_buff_rect_offset_struct BufferOffset{
525  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
526  pi_buff_rect_offset_struct HostOffset{
527  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
528  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
529  DstAccessRange[DstPos.YTerm],
530  DstAccessRange[DstPos.ZTerm]};
531 
533  Queue, DstMem,
534  /*blocking_write=*/CL_FALSE, &BufferOffset, &HostOffset, &RectRegion,
535  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
536  SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
537  }
538  } else {
539  size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
540  size_t InputSlicePitch =
541  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
542 
543  pi_image_offset_struct Origin{DstOffset[DstPos.XTerm],
544  DstOffset[DstPos.YTerm],
545  DstOffset[DstPos.ZTerm]};
546  pi_image_region_struct Region{DstAccessRange[DstPos.XTerm],
547  DstAccessRange[DstPos.YTerm],
548  DstAccessRange[DstPos.ZTerm]};
549 
551  Queue, DstMem,
552  /*blocking_write=*/CL_FALSE, &Origin, &Region, InputRowPitch,
553  InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
554  }
555 }
556 
557 void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
558  unsigned int DimSrc, sycl::range<3> SrcSize,
559  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
560  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
561  unsigned int DimDst, sycl::range<3> DstSize,
562  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
563  unsigned int DstElemSize, std::vector<RT::PiEvent> DepEvents,
564  RT::PiEvent &OutEvent) {
565  (void)DstAccessRange;
566  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
567 
568  const RT::PiQueue Queue = SrcQueue->getHandleRef();
569  const detail::plugin &Plugin = SrcQueue->getPlugin();
570 
571  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
572  TermPositions SrcPos, DstPos;
573  prepTermPositions(SrcPos, DimSrc, MemType);
574  prepTermPositions(DstPos, DimDst, MemType);
575 
576  // For a given buffer, the various mem copy routines (copyD2H, copyH2D,
577  // copyD2D) will usually have the same values for AccessRange, Size,
578  // Dimension, Offset, etc. EXCEPT when the dtor for ~SYCLMemObjT is called.
579  // Essentially, it schedules a copyBack of chars thus in copyD2H the
580  // Dimension will then be 1 and DstAccessRange[0] and DstSize[0] will be
581  // sized to bytes with a DstElemSize of 1.
582  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
583  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
584  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
585  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
586  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
587 
588  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
589  if (1 == DimDst && 1 == DimSrc) {
591  Queue, SrcMem,
592  /*blocking_read=*/CL_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
593  DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
594  } else {
595  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
596  size_t BufferSlicePitch =
597  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
598  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
599  size_t HostSlicePitch =
600  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
601 
602  pi_buff_rect_offset_struct BufferOffset{
603  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
604  pi_buff_rect_offset_struct HostOffset{
605  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
606  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
607  SrcAccessRange[SrcPos.YTerm],
608  SrcAccessRange[SrcPos.ZTerm]};
609 
611  Queue, SrcMem,
612  /*blocking_read=*/CL_FALSE, &BufferOffset, &HostOffset, &RectRegion,
613  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
614  DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
615  }
616  } else {
617  size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
618  size_t SlicePitch =
619  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
620 
621  pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm],
622  SrcOffset[SrcPos.YTerm],
623  SrcOffset[SrcPos.ZTerm]};
624  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
625  SrcAccessRange[SrcPos.YTerm],
626  SrcAccessRange[SrcPos.ZTerm]};
627 
629  Queue, SrcMem, CL_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
630  DepEvents.size(), DepEvents.data(), &OutEvent);
631  }
632 }
633 
634 void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
635  unsigned int DimSrc, sycl::range<3> SrcSize,
636  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
637  unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr,
638  unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3>,
639  sycl::id<3> DstOffset, unsigned int DstElemSize,
640  std::vector<RT::PiEvent> DepEvents, RT::PiEvent &OutEvent) {
641  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
642 
643  const RT::PiQueue Queue = SrcQueue->getHandleRef();
644  const detail::plugin &Plugin = SrcQueue->getPlugin();
645 
646  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
647  TermPositions SrcPos, DstPos;
648  prepTermPositions(SrcPos, DimSrc, MemType);
649  prepTermPositions(DstPos, DimDst, MemType);
650 
651  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
652  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
653  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
654  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
655  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
656 
657  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
658  if (1 == DimDst && 1 == DimSrc) {
660  Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
661  SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
662  &OutEvent);
663  } else {
664  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
665  // calculate both src and dest pitch using region[0], which is not correct
666  // if src and dest are not the same size.
667  size_t SrcRowPitch = SrcSzWidthBytes;
668  size_t SrcSlicePitch = (DimSrc <= 1)
669  ? SrcSzWidthBytes
670  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
671  size_t DstRowPitch = DstSzWidthBytes;
672  size_t DstSlicePitch = (DimDst <= 1)
673  ? DstSzWidthBytes
674  : DstSzWidthBytes * DstSize[DstPos.YTerm];
675 
676  pi_buff_rect_offset_struct SrcOrigin{
677  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
678  pi_buff_rect_offset_struct DstOrigin{
679  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
680  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
681  SrcAccessRange[SrcPos.YTerm],
682  SrcAccessRange[SrcPos.ZTerm]};
683 
685  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
686  SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
687  DepEvents.data(), &OutEvent);
688  }
689  } else {
690  pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm],
691  SrcOffset[SrcPos.YTerm],
692  SrcOffset[SrcPos.ZTerm]};
693  pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm],
694  DstOffset[DstPos.YTerm],
695  DstOffset[DstPos.ZTerm]};
696  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
697  SrcAccessRange[SrcPos.YTerm],
698  SrcAccessRange[SrcPos.ZTerm]};
699 
701  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
702  DepEvents.size(), DepEvents.data(), &OutEvent);
703  }
704 }
705 
706 static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr,
707  unsigned int DimSrc, sycl::range<3> SrcSize,
708  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
709  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
710  unsigned int DimDst, sycl::range<3> DstSize,
711  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
712  unsigned int DstElemSize, std::vector<RT::PiEvent>,
713  RT::PiEvent &) {
714  if ((DimSrc != 1 || DimDst != 1) &&
715  (SrcOffset != id<3>{0, 0, 0} || DstOffset != id<3>{0, 0, 0} ||
716  SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
717  throw runtime_error("Not supported configuration of memcpy requested",
719  }
720 
721  SrcMem += SrcOffset[0] * SrcElemSize;
722  DstMem += DstOffset[0] * DstElemSize;
723 
724  if (SrcMem == DstMem)
725  return;
726 
727  size_t BytesToCopy =
728  SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
729  std::memcpy(DstMem, SrcMem, BytesToCopy);
730 }
731 
732 // Copies memory between: host and device, host and host,
733 // device and device if memory objects bound to the one context.
734 void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
735  QueueImplPtr SrcQueue, unsigned int DimSrc,
736  sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange,
737  sycl::id<3> SrcOffset, unsigned int SrcElemSize,
738  void *DstMem, QueueImplPtr TgtQueue,
739  unsigned int DimDst, sycl::range<3> DstSize,
740  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
741  unsigned int DstElemSize,
742  std::vector<RT::PiEvent> DepEvents,
743  RT::PiEvent &OutEvent) {
744 
745  if (SrcQueue->is_host()) {
746  if (TgtQueue->is_host())
747  copyH2H(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
748  SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem,
749  std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
750  DstElemSize, std::move(DepEvents), OutEvent);
751 
752  else
753  copyH2D(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
754  SrcAccessRange, SrcOffset, SrcElemSize,
755  pi::cast<RT::PiMem>(DstMem), std::move(TgtQueue), DimDst, DstSize,
756  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
757  OutEvent);
758  } else {
759  if (TgtQueue->is_host())
760  copyD2H(SYCLMemObj, pi::cast<RT::PiMem>(SrcMem), std::move(SrcQueue),
761  DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize,
762  (char *)DstMem, std::move(TgtQueue), DimDst, DstSize,
763  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
764  OutEvent);
765  else
766  copyD2D(SYCLMemObj, pi::cast<RT::PiMem>(SrcMem), std::move(SrcQueue),
767  DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize,
768  pi::cast<RT::PiMem>(DstMem), std::move(TgtQueue), DimDst, DstSize,
769  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
770  OutEvent);
771  }
772 }
773 
774 void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
775  size_t PatternSize, const char *Pattern,
776  unsigned int Dim, sycl::range<3>, sycl::range<3> Range,
777  sycl::id<3> Offset, unsigned int ElementSize,
778  std::vector<RT::PiEvent> DepEvents,
779  RT::PiEvent &OutEvent) {
780  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
781 
782  const detail::plugin &Plugin = Queue->getPlugin();
783  if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
784  if (Dim == 1) {
786  Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem), Pattern, PatternSize,
787  Offset[0] * ElementSize, Range[0] * ElementSize, DepEvents.size(),
788  DepEvents.data(), &OutEvent);
789  return;
790  }
791  throw runtime_error("Not supported configuration of fill requested",
793  } else {
795  Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem), Pattern, &Offset[0],
796  &Range[0], DepEvents.size(), DepEvents.data(), &OutEvent);
797  }
798 }
799 
800 void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
801  access::mode AccessMode, unsigned int, sycl::range<3>,
802  sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
803  unsigned int ElementSize,
804  std::vector<RT::PiEvent> DepEvents,
805  RT::PiEvent &OutEvent) {
806  if (Queue->is_host()) {
807  throw runtime_error("Not supported configuration of map requested",
809  }
810 
811  pi_map_flags Flags = 0;
812 
813  switch (AccessMode) {
814  case access::mode::read:
815  Flags |= PI_MAP_READ;
816  break;
817  case access::mode::write:
818  Flags |= PI_MAP_WRITE;
819  break;
820  case access::mode::read_write:
821  case access::mode::atomic:
822  Flags = PI_MAP_WRITE | PI_MAP_READ;
823  break;
824  case access::mode::discard_write:
825  case access::mode::discard_read_write:
827  break;
828  }
829 
830  AccessOffset[0] *= ElementSize;
831  AccessRange[0] *= ElementSize;
832 
833  // TODO: Handle offset
834  assert(AccessOffset[0] == 0 && "Handle offset");
835 
836  void *MappedPtr = nullptr;
837  const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
838  const detail::plugin &Plugin = Queue->getPlugin();
839  memBufferMapHelper(Plugin, Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem),
840  CL_FALSE, Flags, AccessOffset[0], BytesToMap,
841  DepEvents.size(), DepEvents.data(), &OutEvent, &MappedPtr);
842  return MappedPtr;
843 }
844 
845 void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
846  void *MappedPtr, std::vector<RT::PiEvent> DepEvents,
847  RT::PiEvent &OutEvent) {
848 
849  // Host queue is not supported here.
850  // All DepEvents are to the same Context.
851  // Using the plugin of the Queue.
852 
853  const detail::plugin &Plugin = Queue->getPlugin();
854  memUnmapHelper(Plugin, Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem),
855  MappedPtr, DepEvents.size(), DepEvents.data(), &OutEvent);
856 }
857 
858 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
859  size_t Len, void *DstMem,
860  std::vector<RT::PiEvent> DepEvents,
861  RT::PiEvent *OutEvent) {
862  sycl::context Context = SrcQueue->get_context();
863 
864  if (!Len) { // no-op, but ensure DepEvents will still be waited on
865  if (!Context.is_host() && !DepEvents.empty()) {
866  SrcQueue->getPlugin().call<PiApiKind::piEnqueueEventsWait>(
867  SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
868  OutEvent);
869  }
870  return;
871  }
872 
873  if (!SrcMem || !DstMem)
874  throw runtime_error("NULL pointer argument in memory copy operation.",
876 
877  if (Context.is_host()) {
878  std::memcpy(DstMem, SrcMem, Len);
879  } else {
880  const detail::plugin &Plugin = SrcQueue->getPlugin();
881  Plugin.call<PiApiKind::piextUSMEnqueueMemcpy>(SrcQueue->getHandleRef(),
882  /* blocking */ false, DstMem,
883  SrcMem, Len, DepEvents.size(),
884  DepEvents.data(), OutEvent);
885  }
886 }
887 
888 void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
889  int Pattern, std::vector<RT::PiEvent> DepEvents,
890  RT::PiEvent *OutEvent) {
891  sycl::context Context = Queue->get_context();
892 
893  if (!Length) { // no-op, but ensure DepEvents will still be waited on
894  if (!Context.is_host() && !DepEvents.empty()) {
895  Queue->getPlugin().call<PiApiKind::piEnqueueEventsWait>(
896  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
897  }
898  return;
899  }
900 
901  if (!Mem)
902  throw runtime_error("NULL pointer argument in memory fill operation.",
904 
905  if (Context.is_host()) {
906  std::memset(Mem, Pattern, Length);
907  } else {
908  const detail::plugin &Plugin = Queue->getPlugin();
910  Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(),
911  DepEvents.data(), OutEvent);
912  }
913 }
914 
915 void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length,
916  std::vector<RT::PiEvent> DepEvents,
917  RT::PiEvent *OutEvent) {
918  sycl::context Context = Queue->get_context();
919 
920  if (Context.is_host()) {
921  // TODO: Potentially implement prefetch on the host.
922  } else {
923  const detail::plugin &Plugin = Queue->getPlugin();
925  Queue->getHandleRef(), Mem, Length, _pi_usm_migration_flags(0),
926  DepEvents.size(), DepEvents.data(), OutEvent);
927  }
928 }
929 
930 void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue,
931  size_t Length, pi_mem_advice Advice,
932  std::vector<RT::PiEvent> /*DepEvents*/,
933  RT::PiEvent *OutEvent) {
934  sycl::context Context = Queue->get_context();
935 
936  if (!Context.is_host()) {
937  const detail::plugin &Plugin = Queue->getPlugin();
938  Plugin.call<PiApiKind::piextUSMEnqueueMemAdvise>(Queue->getHandleRef(), Mem,
939  Length, Advice, OutEvent);
940  }
941 }
942 
943 // TODO: Delete this function when ABI breaking changes are allowed.
944 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len,
945  void *DstMem, std::vector<RT::PiEvent> DepEvents,
946  RT::PiEvent &OutEvent) {
947  copy_usm(SrcMem, Queue, Len, DstMem, DepEvents, &OutEvent);
948 }
949 
950 // TODO: Delete this function when ABI breaking changes are allowed.
951 void MemoryManager::fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len,
952  int Pattern, std::vector<RT::PiEvent> DepEvents,
953  RT::PiEvent &OutEvent) {
954  fill_usm(DstMem, Queue, Len, Pattern, DepEvents, &OutEvent);
955 }
956 
957 // TODO: Delete this function when ABI breaking changes are allowed.
958 void MemoryManager::prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len,
959  std::vector<RT::PiEvent> DepEvents,
960  RT::PiEvent &OutEvent) {
961  prefetch_usm(Ptr, Queue, Len, DepEvents, &OutEvent);
962 }
963 
964 // TODO: Delete this function when ABI breaking changes are allowed.
965 void MemoryManager::advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len,
966  pi_mem_advice Advice,
967  std::vector<RT::PiEvent> DepEvents,
968  RT::PiEvent &OutEvent) {
969  advise_usm(Ptr, Queue, Len, Advice, DepEvents, &OutEvent);
970 }
971 
972 } // namespace detail
973 } // namespace sycl
974 } // __SYCL_INLINE_NAMESPACE(cl)
_pi_mem
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:207
PI_MAP_WRITE
constexpr pi_map_flags PI_MAP_WRITE
Definition: pi.h:594
event_impl.hpp
PI_SUCCESS
@ PI_SUCCESS
Definition: pi.h:86
mem_alloc_helper.hpp
pi_buff_rect_offset_struct
Definition: pi.h:855
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:32
cl::sycl::detail::getMemObjCreationFlags
static RT::PiMemFlags getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly)
Definition: memory_manager.cpp:326
pi_bool
pi_uint32 pi_bool
Definition: pi.h:74
context_impl.hpp
cl::sycl::detail::SYCLMemObjI::releaseHostMem
virtual void releaseHostMem(void *Ptr)=0
PI_INVALID_OPERATION
@ PI_INVALID_OPERATION
Definition: pi.h:88
cl::sycl::detail::memUnmapHelper
void memUnmapHelper(const plugin &Plugin, pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: memory_manager.cpp:206
PI_MEM_FLAGS_HOST_PTR_COPY
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY
Definition: pi.h:588
cl::sycl::id< 3 >
xpti_registry.hpp
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:459
cl::sycl::detail::TermPositions::XTerm
int XTerm
Definition: memory_manager.cpp:456
piEnqueueMemBufferCopy
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1570
_pi_result
_pi_result
Definition: pi.h:85
piEnqueueMemUnmap
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1636
cl::sycl::detail::copyD2D
void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 >, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent > DepEvents, RT::PiEvent &OutEvent)
Definition: memory_manager.cpp:634
cl::sycl::property_list::has_property
bool has_property() const
Definition: property_list.hpp:50
cl::sycl::detail::copyH2H
static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent >, RT::PiEvent &)
Definition: memory_manager.cpp:706
cl::sycl::detail::pi::PiMemFlags
::pi_mem_flags PiMemFlags
Definition: pi.hpp:112
sycl
Definition: invoke_simd.hpp:68
cl::sycl::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
piEnqueueMemImageFill
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1747
queue_impl.hpp
cl::sycl::detail::memBufferMapHelper
void memBufferMapHelper(const plugin &Plugin, pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: memory_manager.cpp:182
cl::sycl::detail::copyH2D
void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent > DepEvents, RT::PiEvent &OutEvent)
Definition: memory_manager.cpp:485
cl::sycl::detail::plugin::call_nocheck
RT::PiResult call_nocheck(ArgsT... Args) const
Calls the PiApi, traces the call, and returns the result.
Definition: plugin.hpp:170
cl::sycl::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:110
cl::sycl::range< 3 >
cl::sycl::detail::SYCLMemObjI::getType
virtual MemObjType getType() const =0
cl::sycl::detail::SYCLMemObjI::MemObjType
MemObjType
Definition: sycl_mem_obj_i.hpp:32
cl::sycl::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
cl::sycl::range::size
size_t size() const
Definition: range.hpp:50
cl::sycl::detail::prepTermPositions
void prepTermPositions(TermPositions &pos, int Dimensions, detail::SYCLMemObjI::MemObjType Type)
Definition: memory_manager.cpp:460
PI_MEM_ACCESS_READ_ONLY
constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY
Definition: pi.h:585
piextUSMEnqueueMemcpy
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_esimd_emulator.cpp:1922
piMemBufferPartition
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_esimd_emulator.cpp:1753
piextUSMEnqueueMemAdvise
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_esimd_emulator.cpp:1927
cl::sycl::detail::emitMemAllocBeginTrace
uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone)
Definition: memory_manager.cpp:35
piEventsWait
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_esimd_emulator.cpp:1407
_pi_queue
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:378
PI_MAP_READ
constexpr pi_map_flags PI_MAP_READ
Definition: pi.h:593
pi_uint32
uint32_t pi_uint32
Definition: pi.h:72
piEnqueueMemBufferReadRect
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1548
pi_buff_rect_region_struct
Definition: pi.h:864
piMemRelease
pi_result piMemRelease(pi_mem mem)
Definition: pi_esimd_emulator.cpp:1093
piEnqueueMemBufferCopyRect
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1576
piEnqueueMemBufferWrite
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1556
PI_MISALIGNED_SUB_BUFFER_OFFSET
@ PI_MISALIGNED_SUB_BUFFER_OFFSET
Definition: pi.h:106
pi_mem_flags
pi_bitfield pi_mem_flags
Definition: pi.h:582
piEnqueueMemImageRead
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1678
cl::sycl::context::is_host
bool is_host() const
Gets OpenCL interoperability context.
Definition: context.cpp:119
cl::sycl::detail::EventImplPtr
std::shared_ptr< detail::event_impl > EventImplPtr
Definition: memory_manager.hpp:31
cl::sycl::detail::TermPositions::YTerm
int YTerm
Definition: memory_manager.cpp:457
cl::sycl::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
cl::sycl::detail::emitMemReleaseBeginTrace
uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr)
Definition: memory_manager.cpp:76
cl::sycl::detail::copyD2H
void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent > DepEvents, RT::PiEvent &OutEvent)
Definition: memory_manager.cpp:557
piEnqueueMemBufferFill
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1584
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
PI_MEM_FLAGS_HOST_PTR_USE
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:587
cl::sycl::detail::waitForEvents
static void waitForEvents(const std::vector< EventImplPtr > &Events)
Definition: memory_manager.cpp:113
PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
Definition: pi.h:602
pi_mem_properties
pi_bitfield pi_mem_properties
Definition: pi.h:600
cl::sycl::detail::memReleaseHelper
void memReleaseHelper(const plugin &Plugin, pi_mem Mem)
Definition: memory_manager.cpp:155
cl::sycl::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
piextUSMEnqueueMemset
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
Definition: pi_esimd_emulator.cpp:1917
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:76
PI_MEM_FLAGS_ACCESS_RW
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:584
cl::sycl::property_list::get_property
PropT get_property() const
Definition: property_list.hpp:42
piMemImageCreate
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_esimd_emulator.cpp:1158
_pi_image_format
Definition: pi.h:931
cl::sycl::detail::QueueImplPtr
std::shared_ptr< detail::queue_impl > QueueImplPtr
Definition: memory_manager.hpp:30
cl::sycl::detail::SYCLMemObjI::allocateHostMem
virtual void * allocateHostMem()=0
piEnqueueMemBufferMap
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_esimd_emulator.cpp:1590
cl::sycl::detail::SYCLMemObjI::allocateMem
virtual void * allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, RT::PiEvent &InteropEvent)=0
piextMemGetNativeHandle
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_esimd_emulator.cpp:1277
piEnqueueMemBufferRead
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1495
PI_INVALID_VALUE
@ PI_INVALID_VALUE
Definition: pi.h:91
cl::sycl::property::buffer::detail::buffer_location
Definition: buffer_properties.hpp:55
piEnqueueEventsWait
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1485
piextUSMEnqueuePrefetch
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_esimd_emulator.cpp:1963
PI_MEM_FLAGS_HOST_PTR_ALLOC
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC
Definition: pi.h:589
pi_buffer_region_struct
Definition: pi.h:847
_pi_image_desc
Definition: pi.h:936
piEventRetain
pi_result piEventRetain(pi_event event)
Definition: pi_esimd_emulator.cpp:1432
_pi_event
PI Event mapping to CUevent.
Definition: pi_cuda.hpp:460
piMemBufferCreate
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
Definition: pi_esimd_emulator.cpp:1007
PI_MAP_WRITE_INVALIDATE_REGION
constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION
Definition: pi.h:595
PI_BUFFER_CREATE_TYPE_REGION
@ PI_BUFFER_CREATE_TYPE_REGION
Definition: pi.h:511
piEnqueueMemBufferWriteRect
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1562
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:35
pi_image_offset_struct
Definition: pi.h:873
pi_image_region_struct
Definition: pi.h:882
_pi_usm_migration_flags
_pi_usm_migration_flags
Definition: pi.h:1658
cl::sycl::detail::TermPositions::ZTerm
int ZTerm
Definition: memory_manager.cpp:458
cl::sycl::access::mode
mode
Definition: access.hpp:28
cl::sycl::detail::TermPositions
Definition: memory_manager.cpp:455
piEnqueueMemImageCopy
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1741
cl::sycl::ext::oneapi::property::buffer::use_pinned_host_memory
Definition: buffer_properties.hpp:74
cl::sycl::detail::emitMemReleaseEndTrace
void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, uint64_t CorrelationID)
Definition: memory_manager.cpp:95
cl::sycl::detail::plugin::reportPiError
void reportPiError(RT::PiResult pi_result, const char *context) const
Definition: plugin.hpp:149
piEnqueueMemImageWrite
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1735
pi_map_flags
pi_bitfield pi_map_flags
Definition: pi.h:592
cl::sycl::Dimensions
Dimensions
Definition: backend.hpp:138
cl::sycl::detail::SYCLMemObjI
Definition: sycl_mem_obj_i.hpp:28
cl::sycl::detail::memBufferCreateHelper
void memBufferCreateHelper(const plugin &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
Definition: memory_manager.cpp:127
_pi_context
PI context mapping to a CUDA context object.
Definition: pi_cuda.hpp:150
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12
cl::sycl::detail::emitMemAllocEndTrace
void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, size_t AllocSize, size_t GuardZone, uint64_t CorrelationID)
Definition: memory_manager.cpp:56
cl::sycl::detail::SYCLMemObjI::releaseMem
virtual void releaseMem(ContextImplPtr Context, void *Ptr)=0
memory_manager.hpp