DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_manager.cpp
Go to the documentation of this file.
1 //==-------------- memory_manager.cpp --------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <detail/event_impl.hpp>
13 #include <detail/queue_impl.hpp>
14 #include <detail/xpti_registry.hpp>
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <cstring>
19 #include <vector>
20 
21 #ifdef XPTI_ENABLE_INSTRUMENTATION
22 #include <xpti/xpti_data_types.h>
23 #include <xpti/xpti_trace_framework.hpp>
24 #endif
25 
26 namespace sycl {
28 namespace detail {
29 
30 #ifdef XPTI_ENABLE_INSTRUMENTATION
31 uint8_t GMemAllocStreamID;
32 xpti::trace_event_data_t *GMemAllocEvent;
33 #endif
34 
35 uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize,
36  size_t GuardZone) {
37  (void)ObjHandle;
38  (void)AllocSize;
39  (void)GuardZone;
40  uint64_t CorrelationID = 0;
41 #ifdef XPTI_ENABLE_INSTRUMENTATION
42  if (xptiTraceEnabled()) {
43  xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 /* alloc ptr */, AllocSize,
44  GuardZone};
45 
46  CorrelationID = xptiGetUniqueId();
47  xptiNotifySubscribers(
48  GMemAllocStreamID,
49  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_begin),
50  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
51  }
52 #endif
53  return CorrelationID;
54 }
55 
56 void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
57  size_t AllocSize, size_t GuardZone,
58  uint64_t CorrelationID) {
59  (void)ObjHandle;
60  (void)AllocPtr;
61  (void)AllocSize;
62  (void)GuardZone;
63  (void)CorrelationID;
64 #ifdef XPTI_ENABLE_INSTRUMENTATION
65  if (xptiTraceEnabled()) {
66  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
67 
68  xptiNotifySubscribers(
69  GMemAllocStreamID,
70  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_end),
71  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
72  }
73 #endif
74 }
75 
76 uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr) {
77  (void)ObjHandle;
78  (void)AllocPtr;
79  uint64_t CorrelationID = 0;
80 #ifdef XPTI_ENABLE_INSTRUMENTATION
81  if (xptiTraceEnabled()) {
82  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
83  0 /* guard zone */};
84 
85  CorrelationID = xptiGetUniqueId();
86  xptiNotifySubscribers(
87  GMemAllocStreamID,
88  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_begin),
89  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
90  }
91 #endif
92  return CorrelationID;
93 }
94 
95 void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
96  uint64_t CorrelationID) {
97  (void)ObjHandle;
98  (void)AllocPtr;
99  (void)CorrelationID;
100 #ifdef XPTI_ENABLE_INSTRUMENTATION
101  if (xptiTraceEnabled()) {
102  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
103  0 /* guard zone */};
104 
105  xptiNotifySubscribers(
106  GMemAllocStreamID,
107  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_end),
108  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
109  }
110 #endif
111 }
112 
113 static void waitForEvents(const std::vector<EventImplPtr> &Events) {
114  // Assuming all events will be on the same device or
115  // devices associated with the same Backend.
116  if (!Events.empty()) {
117  const detail::plugin &Plugin = Events[0]->getPlugin();
118  std::vector<RT::PiEvent> PiEvents(Events.size());
119  std::transform(Events.begin(), Events.end(), PiEvents.begin(),
120  [](const EventImplPtr &EventImpl) {
121  return EventImpl->getHandleRef();
122  });
123  Plugin.call<PiApiKind::piEventsWait>(PiEvents.size(), &PiEvents[0]);
124  }
125 }
126 
127 void memBufferCreateHelper(const plugin &Plugin, pi_context Ctx,
128  pi_mem_flags Flags, size_t Size, void *HostPtr,
129  pi_mem *RetMem, const pi_mem_properties *Props) {
130 #ifdef XPTI_ENABLE_INSTRUMENTATION
131  uint64_t CorrID = 0;
132 #endif
133  // We only want to instrument piMemBufferCreate
134  {
135 #ifdef XPTI_ENABLE_INSTRUMENTATION
136  CorrID =
137  emitMemAllocBeginTrace(0 /* mem object */, Size, 0 /* guard zone */);
138  xpti::utils::finally _{[&] {
139  // C-style cast is required for MSVC
140  uintptr_t MemObjID = (uintptr_t)(*RetMem);
141  pi_native_handle Ptr = 0;
142  // Always use call_nocheck here, because call may throw an exception,
143  // and this lambda will be called from destructor, which in combination
144  // rewards us with UB.
145  Plugin.call_nocheck<PiApiKind::piextMemGetNativeHandle>(*RetMem, &Ptr);
146  emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */,
147  CorrID);
148  }};
149 #endif
150  Plugin.call<PiApiKind::piMemBufferCreate>(Ctx, Flags, Size, HostPtr, RetMem,
151  Props);
152  }
153 }
154 
155 void memReleaseHelper(const plugin &Plugin, pi_mem Mem) {
156  // FIXME piMemRelease does not guarante memory release. It is only true if
157  // reference counter is 1. However, SYCL runtime currently only calls
158  // piMemRetain only for OpenCL interop
159 #ifdef XPTI_ENABLE_INSTRUMENTATION
160  uint64_t CorrID = 0;
161  // C-style cast is required for MSVC
162  uintptr_t MemObjID = (uintptr_t)(Mem);
163  uintptr_t Ptr = 0;
164  // Do not make unnecessary PI calls without instrumentation enabled
165  if (xptiTraceEnabled()) {
166  pi_native_handle PtrHandle = 0;
167  Plugin.call<PiApiKind::piextMemGetNativeHandle>(Mem, &PtrHandle);
168  Ptr = (uintptr_t)(PtrHandle);
169  }
170 #endif
171  // We only want to instrument piMemRelease
172  {
173 #ifdef XPTI_ENABLE_INSTRUMENTATION
174  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
175  xpti::utils::finally _{
176  [&] { emitMemReleaseEndTrace(MemObjID, Ptr, CorrID); }};
177 #endif
178  Plugin.call<PiApiKind::piMemRelease>(Mem);
179  }
180 }
181 
182 void memBufferMapHelper(const plugin &Plugin, pi_queue Queue, pi_mem Buffer,
183  pi_bool Blocking, pi_map_flags Flags, size_t Offset,
184  size_t Size, pi_uint32 NumEvents,
185  const pi_event *WaitList, pi_event *Event,
186  void **RetMap) {
187 #ifdef XPTI_ENABLE_INSTRUMENTATION
188  uint64_t CorrID = 0;
189  uintptr_t MemObjID = (uintptr_t)(Buffer);
190 #endif
191  // We only want to instrument piEnqueueMemBufferMap
192  {
193 #ifdef XPTI_ENABLE_INSTRUMENTATION
194  CorrID = emitMemAllocBeginTrace(MemObjID, Size, 0 /* guard zone */);
195  xpti::utils::finally _{[&] {
196  emitMemAllocEndTrace(MemObjID, (uintptr_t)(*RetMap), Size,
197  0 /* guard zone */, CorrID);
198  }};
199 #endif
201  Queue, Buffer, Blocking, Flags, Offset, Size, NumEvents, WaitList,
202  Event, RetMap);
203  }
204 }
205 
206 void memUnmapHelper(const plugin &Plugin, pi_queue Queue, pi_mem Mem,
207  void *MappedPtr, pi_uint32 NumEvents,
208  const pi_event *WaitList, pi_event *Event) {
209 #ifdef XPTI_ENABLE_INSTRUMENTATION
210  uint64_t CorrID = 0;
211  uintptr_t MemObjID = (uintptr_t)(Mem);
212  uintptr_t Ptr = (uintptr_t)(MappedPtr);
213 #endif
214  // We only want to instrument piEnqueueMemUnmap
215  {
216 #ifdef XPTI_ENABLE_INSTRUMENTATION
217  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
218  xpti::utils::finally _{[&] {
219  // There's no way for SYCL to know, when the pointer is freed, so we have
220  // to explicitly wait for the end of data transfers here in order to
221  // report correct events.
222  // Always use call_nocheck here, because call may throw an exception,
223  // and this lambda will be called from destructor, which in combination
224  // rewards us with UB.
225  Plugin.call_nocheck<PiApiKind::piEventsWait>(1, Event);
226  emitMemReleaseEndTrace(MemObjID, Ptr, CorrID);
227  }};
228 #endif
229  Plugin.call<PiApiKind::piEnqueueMemUnmap>(Queue, Mem, MappedPtr, NumEvents,
230  WaitList, Event);
231  }
232 }
233 
234 void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
235  void *MemAllocation,
236  std::vector<EventImplPtr> DepEvents,
237  RT::PiEvent &OutEvent) {
238  // There is no async API for memory releasing. Explicitly wait for all
239  // dependency events and return empty event.
240  waitForEvents(DepEvents);
241  OutEvent = nullptr;
242  XPTIRegistry::bufferReleaseNotification(MemObj, MemAllocation);
243  MemObj->releaseMem(TargetContext, MemAllocation);
244 }
245 
246 void MemoryManager::releaseMemObj(ContextImplPtr TargetContext,
247  SYCLMemObjI *MemObj, void *MemAllocation,
248  void *UserPtr) {
249  if (UserPtr == MemAllocation) {
250  // Do nothing as it's user provided memory.
251  return;
252  }
253 
254  if (TargetContext->is_host()) {
255  MemObj->releaseHostMem(MemAllocation);
256  return;
257  }
258 
259  const detail::plugin &Plugin = TargetContext->getPlugin();
260  memReleaseHelper(Plugin, pi::cast<RT::PiMem>(MemAllocation));
261 }
262 
263 void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
264  bool InitFromUserData, void *HostPtr,
265  std::vector<EventImplPtr> DepEvents,
266  RT::PiEvent &OutEvent) {
267  // There is no async API for memory allocation. Explicitly wait for all
268  // dependency events and return empty event.
269  waitForEvents(DepEvents);
270  OutEvent = nullptr;
271 
272  return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr,
273  OutEvent);
274 }
275 
276 void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
277  bool HostPtrReadOnly, size_t Size,
278  const sycl::property_list &) {
279  std::ignore = HostPtrReadOnly;
280  std::ignore = Size;
281 
282  // Can return user pointer directly if it is not a nullptr.
283  if (UserPtr)
284  return UserPtr;
285 
286  return MemObj->allocateHostMem();
287  ;
288 }
289 
290 void *MemoryManager::allocateInteropMemObject(
291  ContextImplPtr TargetContext, void *UserPtr,
292  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
293  const sycl::property_list &, RT::PiEvent &OutEventToWait) {
294  (void)TargetContext;
295  (void)InteropContext;
296  // If memory object is created with interop c'tor return cl_mem as is.
297  assert(TargetContext == InteropContext && "Expected matching contexts");
298  OutEventToWait = InteropEvent->getHandleRef();
299  // Retain the event since it will be released during alloca command
300  // destruction
301  if (nullptr != OutEventToWait) {
302  const detail::plugin &Plugin = InteropEvent->getPlugin();
303  Plugin.call<PiApiKind::piEventRetain>(OutEventToWait);
304  }
305  return UserPtr;
306 }
307 
309  bool HostPtrReadOnly) {
310  // Create read_write mem object to handle arbitrary uses.
311  RT::PiMemFlags Result =
313  if (UserPtr)
314  Result |= PI_MEM_FLAGS_HOST_PTR_USE;
315  return Result;
316 }
317 
318 void *MemoryManager::allocateImageObject(ContextImplPtr TargetContext,
319  void *UserPtr, bool HostPtrReadOnly,
320  const RT::PiMemImageDesc &Desc,
321  const RT::PiMemImageFormat &Format,
322  const sycl::property_list &) {
323  RT::PiMemFlags CreationFlags =
324  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
325 
326  RT::PiMem NewMem;
327  const detail::plugin &Plugin = TargetContext->getPlugin();
328  Plugin.call<PiApiKind::piMemImageCreate>(TargetContext->getHandleRef(),
329  CreationFlags, &Format, &Desc,
330  UserPtr, &NewMem);
331  return NewMem;
332 }
333 
334 void *
335 MemoryManager::allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr,
336  bool HostPtrReadOnly, const size_t Size,
337  const sycl::property_list &PropsList) {
338  RT::PiMemFlags CreationFlags =
339  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
340  if (PropsList.has_property<
341  sycl::ext::oneapi::property::buffer::use_pinned_host_memory>())
342  CreationFlags |= PI_MEM_FLAGS_HOST_PTR_ALLOC;
343 
344  RT::PiMem NewMem = nullptr;
345  const detail::plugin &Plugin = TargetContext->getPlugin();
346 
347  if (PropsList.has_property<property::buffer::detail::buffer_location>())
348  if (TargetContext->isBufferLocationSupported()) {
349  auto location =
350  PropsList.get_property<property::buffer::detail::buffer_location>()
351  .get_buffer_location();
353  location, 0};
354  memBufferCreateHelper(Plugin, TargetContext->getHandleRef(),
355  CreationFlags, Size, UserPtr, &NewMem, props);
356  return NewMem;
357  }
358  memBufferCreateHelper(Plugin, TargetContext->getHandleRef(), CreationFlags,
359  Size, UserPtr, &NewMem, nullptr);
360  return NewMem;
361 }
362 
363 void *MemoryManager::allocateMemBuffer(ContextImplPtr TargetContext,
364  SYCLMemObjI *MemObj, void *UserPtr,
365  bool HostPtrReadOnly, size_t Size,
366  const EventImplPtr &InteropEvent,
367  const ContextImplPtr &InteropContext,
368  const sycl::property_list &PropsList,
369  RT::PiEvent &OutEventToWait) {
370  void *MemPtr;
371  if (TargetContext->is_host())
372  MemPtr =
373  allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
374  else if (UserPtr && InteropContext)
375  MemPtr =
376  allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
377  InteropContext, PropsList, OutEventToWait);
378  else
379  MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
380  PropsList);
381  XPTIRegistry::bufferAssociateNotification(MemObj, MemPtr);
382  return MemPtr;
383 }
384 
385 void *MemoryManager::allocateMemImage(
386  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
387  bool HostPtrReadOnly, size_t Size, const RT::PiMemImageDesc &Desc,
388  const RT::PiMemImageFormat &Format, const EventImplPtr &InteropEvent,
389  const ContextImplPtr &InteropContext, const sycl::property_list &PropsList,
390  RT::PiEvent &OutEventToWait) {
391  if (TargetContext->is_host())
392  return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
393  PropsList);
394  if (UserPtr && InteropContext)
395  return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
396  InteropContext, PropsList, OutEventToWait);
397  return allocateImageObject(TargetContext, UserPtr, HostPtrReadOnly, Desc,
398  Format, PropsList);
399 }
400 
401 void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
402  void *ParentMemObj, size_t ElemSize,
403  size_t Offset, range<3> Range,
404  std::vector<EventImplPtr> DepEvents,
405  RT::PiEvent &OutEvent) {
406  waitForEvents(DepEvents);
407  OutEvent = nullptr;
408 
409  if (TargetContext->is_host())
410  return static_cast<void *>(static_cast<char *>(ParentMemObj) + Offset);
411 
412  size_t SizeInBytes = ElemSize;
413  for (size_t I = 0; I < 3; ++I)
414  SizeInBytes *= Range[I];
415 
416  RT::PiResult Error = PI_SUCCESS;
417  pi_buffer_region_struct Region{Offset, SizeInBytes};
418  RT::PiMem NewMem;
419  const detail::plugin &Plugin = TargetContext->getPlugin();
421  pi::cast<RT::PiMem>(ParentMemObj), PI_MEM_FLAGS_ACCESS_RW,
422  PI_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem);
423  if (Error == PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET)
424  throw invalid_object_error(
425  "Specified offset of the sub-buffer being constructed is not a "
426  "multiple of the memory base address alignment",
427  PI_ERROR_INVALID_VALUE);
428 
429  if (Error != PI_SUCCESS) {
430  Plugin.reportPiError(Error, "allocateMemSubBuffer()");
431  }
432 
433  return NewMem;
434 }
435 
437  int XTerm;
438  int YTerm;
439  int ZTerm;
440 };
443  // For buffers, the offsets/ranges coming from accessor are always
444  // id<3>/range<3> But their organization varies by dimension:
445  // 1 ==> {width, 1, 1}
446  // 2 ==> {height, width, 1}
447  // 3 ==> {depth, height, width}
448  // Some callers schedule 0 as DimDst/DimSrc.
449 
450  if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
451  if (Dimensions == 3) {
452  pos.XTerm = 2, pos.YTerm = 1, pos.ZTerm = 0;
453  } else if (Dimensions == 2) {
454  pos.XTerm = 1, pos.YTerm = 0, pos.ZTerm = 2;
455  } else { // Dimension is 1 or 0
456  pos.XTerm = 0, pos.YTerm = 1, pos.ZTerm = 2;
457  }
458  } else { // While range<>/id<> use by images is different than buffers, it's
459  // consistent with their accessors.
460  pos.XTerm = 0;
461  pos.YTerm = 1;
462  pos.ZTerm = 2;
463  }
464 }
465 
466 void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
467  unsigned int DimSrc, sycl::range<3> SrcSize,
468  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
469  unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr TgtQueue,
470  unsigned int DimDst, sycl::range<3> DstSize,
471  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
472  unsigned int DstElemSize, std::vector<RT::PiEvent> DepEvents,
473  RT::PiEvent &OutEvent) {
474  (void)SrcAccessRange;
475  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
476 
477  const RT::PiQueue Queue = TgtQueue->getHandleRef();
478  const detail::plugin &Plugin = TgtQueue->getPlugin();
479 
481  TermPositions SrcPos, DstPos;
482  prepTermPositions(SrcPos, DimSrc, MemType);
483  prepTermPositions(DstPos, DimDst, MemType);
484 
485  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
486  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
487  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
488  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
489  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
490 
491  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
492  if (1 == DimDst && 1 == DimSrc) {
494  Queue, DstMem,
495  /*blocking_write=*/PI_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
496  SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
497  } else {
498  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
499  size_t BufferSlicePitch =
500  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
501  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
502  size_t HostSlicePitch =
503  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
504 
505  pi_buff_rect_offset_struct BufferOffset{
506  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
507  pi_buff_rect_offset_struct HostOffset{
508  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
509  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
510  DstAccessRange[DstPos.YTerm],
511  DstAccessRange[DstPos.ZTerm]};
512 
514  Queue, DstMem,
515  /*blocking_write=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
516  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
517  SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
518  }
519  } else {
520  size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
521  size_t InputSlicePitch =
522  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
523 
524  pi_image_offset_struct Origin{DstOffset[DstPos.XTerm],
525  DstOffset[DstPos.YTerm],
526  DstOffset[DstPos.ZTerm]};
527  pi_image_region_struct Region{DstAccessRange[DstPos.XTerm],
528  DstAccessRange[DstPos.YTerm],
529  DstAccessRange[DstPos.ZTerm]};
530 
532  Queue, DstMem,
533  /*blocking_write=*/PI_FALSE, &Origin, &Region, InputRowPitch,
534  InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
535  }
536 }
537 
538 void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
539  unsigned int DimSrc, sycl::range<3> SrcSize,
540  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
541  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
542  unsigned int DimDst, sycl::range<3> DstSize,
543  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
544  unsigned int DstElemSize, std::vector<RT::PiEvent> DepEvents,
545  RT::PiEvent &OutEvent) {
546  (void)DstAccessRange;
547  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
548 
549  const RT::PiQueue Queue = SrcQueue->getHandleRef();
550  const detail::plugin &Plugin = SrcQueue->getPlugin();
551 
553  TermPositions SrcPos, DstPos;
554  prepTermPositions(SrcPos, DimSrc, MemType);
555  prepTermPositions(DstPos, DimDst, MemType);
556 
557  // For a given buffer, the various mem copy routines (copyD2H, copyH2D,
558  // copyD2D) will usually have the same values for AccessRange, Size,
559  // Dimension, Offset, etc. EXCEPT when the dtor for ~SYCLMemObjT is called.
560  // Essentially, it schedules a copyBack of chars thus in copyD2H the
561  // Dimension will then be 1 and DstAccessRange[0] and DstSize[0] will be
562  // sized to bytes with a DstElemSize of 1.
563  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
564  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
565  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
566  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
567  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
568 
569  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
570  if (1 == DimDst && 1 == DimSrc) {
572  Queue, SrcMem,
573  /*blocking_read=*/PI_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
574  DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
575  } else {
576  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
577  size_t BufferSlicePitch =
578  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
579  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
580  size_t HostSlicePitch =
581  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
582 
583  pi_buff_rect_offset_struct BufferOffset{
584  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
585  pi_buff_rect_offset_struct HostOffset{
586  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
587  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
588  SrcAccessRange[SrcPos.YTerm],
589  SrcAccessRange[SrcPos.ZTerm]};
590 
592  Queue, SrcMem,
593  /*blocking_read=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
594  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
595  DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
596  }
597  } else {
598  size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
599  size_t SlicePitch =
600  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
601 
602  pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm],
603  SrcOffset[SrcPos.YTerm],
604  SrcOffset[SrcPos.ZTerm]};
605  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
606  SrcAccessRange[SrcPos.YTerm],
607  SrcAccessRange[SrcPos.ZTerm]};
608 
610  Queue, SrcMem, PI_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
611  DepEvents.size(), DepEvents.data(), &OutEvent);
612  }
613 }
614 
615 void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
616  unsigned int DimSrc, sycl::range<3> SrcSize,
617  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
618  unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr,
619  unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3>,
620  sycl::id<3> DstOffset, unsigned int DstElemSize,
621  std::vector<RT::PiEvent> DepEvents, RT::PiEvent &OutEvent) {
622  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
623 
624  const RT::PiQueue Queue = SrcQueue->getHandleRef();
625  const detail::plugin &Plugin = SrcQueue->getPlugin();
626 
628  TermPositions SrcPos, DstPos;
629  prepTermPositions(SrcPos, DimSrc, MemType);
630  prepTermPositions(DstPos, DimDst, MemType);
631 
632  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
633  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
634  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
635  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
636  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
637 
638  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
639  if (1 == DimDst && 1 == DimSrc) {
641  Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
642  SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
643  &OutEvent);
644  } else {
645  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
646  // calculate both src and dest pitch using region[0], which is not correct
647  // if src and dest are not the same size.
648  size_t SrcRowPitch = SrcSzWidthBytes;
649  size_t SrcSlicePitch = (DimSrc <= 1)
650  ? SrcSzWidthBytes
651  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
652  size_t DstRowPitch = DstSzWidthBytes;
653  size_t DstSlicePitch = (DimDst <= 1)
654  ? DstSzWidthBytes
655  : DstSzWidthBytes * DstSize[DstPos.YTerm];
656 
657  pi_buff_rect_offset_struct SrcOrigin{
658  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
659  pi_buff_rect_offset_struct DstOrigin{
660  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
661  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
662  SrcAccessRange[SrcPos.YTerm],
663  SrcAccessRange[SrcPos.ZTerm]};
664 
666  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
667  SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
668  DepEvents.data(), &OutEvent);
669  }
670  } else {
671  pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm],
672  SrcOffset[SrcPos.YTerm],
673  SrcOffset[SrcPos.ZTerm]};
674  pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm],
675  DstOffset[DstPos.YTerm],
676  DstOffset[DstPos.ZTerm]};
677  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
678  SrcAccessRange[SrcPos.YTerm],
679  SrcAccessRange[SrcPos.ZTerm]};
680 
682  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
683  DepEvents.size(), DepEvents.data(), &OutEvent);
684  }
685 }
686 
687 static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr,
688  unsigned int DimSrc, sycl::range<3> SrcSize,
689  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
690  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
691  unsigned int DimDst, sycl::range<3> DstSize,
692  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
693  unsigned int DstElemSize, std::vector<RT::PiEvent>,
694  RT::PiEvent &) {
695  if ((DimSrc != 1 || DimDst != 1) &&
696  (SrcOffset != id<3>{0, 0, 0} || DstOffset != id<3>{0, 0, 0} ||
697  SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
698  throw runtime_error("Not supported configuration of memcpy requested",
699  PI_ERROR_INVALID_OPERATION);
700  }
701 
702  SrcMem += SrcOffset[0] * SrcElemSize;
703  DstMem += DstOffset[0] * DstElemSize;
704 
705  if (SrcMem == DstMem)
706  return;
707 
708  size_t BytesToCopy =
709  SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
710  std::memcpy(DstMem, SrcMem, BytesToCopy);
711 }
712 
713 // Copies memory between: host and device, host and host,
714 // device and device if memory objects bound to the one context.
715 void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
716  QueueImplPtr SrcQueue, unsigned int DimSrc,
717  sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange,
718  sycl::id<3> SrcOffset, unsigned int SrcElemSize,
719  void *DstMem, QueueImplPtr TgtQueue,
720  unsigned int DimDst, sycl::range<3> DstSize,
721  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
722  unsigned int DstElemSize,
723  std::vector<RT::PiEvent> DepEvents,
724  RT::PiEvent &OutEvent) {
725 
726  if (SrcQueue->is_host()) {
727  if (TgtQueue->is_host())
728  copyH2H(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
729  SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem,
730  std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
731  DstElemSize, std::move(DepEvents), OutEvent);
732 
733  else
734  copyH2D(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
735  SrcAccessRange, SrcOffset, SrcElemSize,
736  pi::cast<RT::PiMem>(DstMem), std::move(TgtQueue), DimDst, DstSize,
737  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
738  OutEvent);
739  } else {
740  if (TgtQueue->is_host())
741  copyD2H(SYCLMemObj, pi::cast<RT::PiMem>(SrcMem), std::move(SrcQueue),
742  DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize,
743  (char *)DstMem, std::move(TgtQueue), DimDst, DstSize,
744  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
745  OutEvent);
746  else
747  copyD2D(SYCLMemObj, pi::cast<RT::PiMem>(SrcMem), std::move(SrcQueue),
748  DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize,
749  pi::cast<RT::PiMem>(DstMem), std::move(TgtQueue), DimDst, DstSize,
750  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
751  OutEvent);
752  }
753 }
754 
755 void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
756  size_t PatternSize, const char *Pattern,
757  unsigned int Dim, sycl::range<3>, sycl::range<3> Range,
758  sycl::id<3> Offset, unsigned int ElementSize,
759  std::vector<RT::PiEvent> DepEvents,
760  RT::PiEvent &OutEvent) {
761  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
762 
763  const detail::plugin &Plugin = Queue->getPlugin();
764  if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
765  if (Dim == 1) {
767  Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem), Pattern, PatternSize,
768  Offset[0] * ElementSize, Range[0] * ElementSize, DepEvents.size(),
769  DepEvents.data(), &OutEvent);
770  return;
771  }
772  throw runtime_error("Not supported configuration of fill requested",
773  PI_ERROR_INVALID_OPERATION);
774  } else {
776  Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem), Pattern, &Offset[0],
777  &Range[0], DepEvents.size(), DepEvents.data(), &OutEvent);
778  }
779 }
780 
781 void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
782  access::mode AccessMode, unsigned int, sycl::range<3>,
783  sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
784  unsigned int ElementSize,
785  std::vector<RT::PiEvent> DepEvents,
786  RT::PiEvent &OutEvent) {
787  if (Queue->is_host()) {
788  throw runtime_error("Not supported configuration of map requested",
789  PI_ERROR_INVALID_OPERATION);
790  }
791 
792  pi_map_flags Flags = 0;
793 
794  switch (AccessMode) {
795  case access::mode::read:
796  Flags |= PI_MAP_READ;
797  break;
798  case access::mode::write:
799  Flags |= PI_MAP_WRITE;
800  break;
801  case access::mode::read_write:
802  case access::mode::atomic:
803  Flags = PI_MAP_WRITE | PI_MAP_READ;
804  break;
805  case access::mode::discard_write:
806  case access::mode::discard_read_write:
808  break;
809  }
810 
811  AccessOffset[0] *= ElementSize;
812  AccessRange[0] *= ElementSize;
813 
814  // TODO: Handle offset
815  assert(AccessOffset[0] == 0 && "Handle offset");
816 
817  void *MappedPtr = nullptr;
818  const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
819  const detail::plugin &Plugin = Queue->getPlugin();
820  memBufferMapHelper(Plugin, Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem),
821  PI_FALSE, Flags, AccessOffset[0], BytesToMap,
822  DepEvents.size(), DepEvents.data(), &OutEvent, &MappedPtr);
823  return MappedPtr;
824 }
825 
826 void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
827  void *MappedPtr, std::vector<RT::PiEvent> DepEvents,
828  RT::PiEvent &OutEvent) {
829 
830  // Host queue is not supported here.
831  // All DepEvents are to the same Context.
832  // Using the plugin of the Queue.
833 
834  const detail::plugin &Plugin = Queue->getPlugin();
835  memUnmapHelper(Plugin, Queue->getHandleRef(), pi::cast<RT::PiMem>(Mem),
836  MappedPtr, DepEvents.size(), DepEvents.data(), &OutEvent);
837 }
838 
839 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
840  size_t Len, void *DstMem,
841  std::vector<RT::PiEvent> DepEvents,
842  RT::PiEvent *OutEvent) {
843  assert(!SrcQueue->getContextImplPtr()->is_host() &&
844  "Host queue not supported in fill_usm.");
845 
846  if (!Len) { // no-op, but ensure DepEvents will still be waited on
847  if (!DepEvents.empty()) {
848  SrcQueue->getPlugin().call<PiApiKind::piEnqueueEventsWait>(
849  SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
850  OutEvent);
851  }
852  return;
853  }
854 
855  if (!SrcMem || !DstMem)
856  throw runtime_error("NULL pointer argument in memory copy operation.",
857  PI_ERROR_INVALID_VALUE);
858 
859  const detail::plugin &Plugin = SrcQueue->getPlugin();
860  Plugin.call<PiApiKind::piextUSMEnqueueMemcpy>(SrcQueue->getHandleRef(),
861  /* blocking */ false, DstMem,
862  SrcMem, Len, DepEvents.size(),
863  DepEvents.data(), OutEvent);
864 }
865 
866 void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
867  int Pattern, std::vector<RT::PiEvent> DepEvents,
868  RT::PiEvent *OutEvent) {
869  assert(!Queue->getContextImplPtr()->is_host() &&
870  "Host queue not supported in fill_usm.");
871 
872  if (!Length) { // no-op, but ensure DepEvents will still be waited on
873  if (!DepEvents.empty()) {
874  Queue->getPlugin().call<PiApiKind::piEnqueueEventsWait>(
875  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
876  }
877  return;
878  }
879 
880  if (!Mem)
881  throw runtime_error("NULL pointer argument in memory fill operation.",
882  PI_ERROR_INVALID_VALUE);
883 
884  const detail::plugin &Plugin = Queue->getPlugin();
886  Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(),
887  DepEvents.data(), OutEvent);
888 }
889 
890 void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length,
891  std::vector<RT::PiEvent> DepEvents,
892  RT::PiEvent *OutEvent) {
893  assert(!Queue->getContextImplPtr()->is_host() &&
894  "Host queue not supported in prefetch_usm.");
895 
896  const detail::plugin &Plugin = Queue->getPlugin();
898  Queue->getHandleRef(), Mem, Length, _pi_usm_migration_flags(0),
899  DepEvents.size(), DepEvents.data(), OutEvent);
900 }
901 
902 void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue,
903  size_t Length, pi_mem_advice Advice,
904  std::vector<RT::PiEvent> /*DepEvents*/,
905  RT::PiEvent *OutEvent) {
906  assert(!Queue->getContextImplPtr()->is_host() &&
907  "Host queue not supported in advise_usm.");
908 
909  const detail::plugin &Plugin = Queue->getPlugin();
910  Plugin.call<PiApiKind::piextUSMEnqueueMemAdvise>(Queue->getHandleRef(), Mem,
911  Length, Advice, OutEvent);
912 }
913 } // namespace detail
914 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
915 } // namespace sycl
virtual void * allocateHostMem()=0
virtual void releaseMem(ContextImplPtr Context, void *Ptr)=0
virtual void * allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, RT::PiEvent &InteropEvent)=0
virtual MemObjType getType() const =0
virtual void releaseHostMem(void *Ptr)=0
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
void reportPiError(RT::PiResult pi_result, const char *context) const
Definition: plugin.hpp:149
RT::PiResult call_nocheck(ArgsT... Args) const
Calls the PiApi, traces the call, and returns the result.
Definition: plugin.hpp:170
#define __SYCL_INLINE_VER_NAMESPACE(X)
::pi_event PiEvent
Definition: pi.hpp:121
::pi_queue PiQueue
Definition: pi.hpp:117
::pi_mem_flags PiMemFlags
Definition: pi.hpp:120
::pi_mem PiMem
Definition: pi.hpp:119
::pi_image_desc PiMemImageDesc
Definition: pi.hpp:128
::pi_image_format PiMemImageFormat
Definition: pi.hpp:127
::pi_result PiResult
Definition: pi.hpp:108
static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent >, RT::PiEvent &)
uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone)
void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 >, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent > DepEvents, RT::PiEvent &OutEvent)
void prepTermPositions(TermPositions &pos, int Dimensions, detail::SYCLMemObjI::MemObjType Type)
void memBufferCreateHelper(const plugin &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
void memUnmapHelper(const plugin &Plugin, pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static void waitForEvents(const std::vector< EventImplPtr > &Events)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:30
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:42
void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, RT::PiMem DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent > DepEvents, RT::PiEvent &OutEvent)
uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr)
void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, size_t AllocSize, size_t GuardZone, uint64_t CorrelationID)
void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, uint64_t CorrelationID)
void memcpy(void *Dst, const void *Src, std::size_t Size)
static RT::PiMemFlags getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:32
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:109
void memReleaseHelper(const plugin &Plugin, pi_mem Mem)
void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< RT::PiEvent > DepEvents, RT::PiEvent &OutEvent)
void memBufferMapHelper(const plugin &Plugin, pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
class __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:2747
class __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_SPECIAL_CLASS AccessMode
Definition: accessor.hpp:2747
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY
Definition: pi.h:545
uintptr_t pi_native_handle
Definition: pi.h:107
pi_bitfield pi_map_flags
Definition: pi.h:552
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_uint32 pi_bool
Definition: pi.h:105
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC
Definition: pi.h:549
_pi_mem_advice
Definition: pi.h:422
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_bitfield pi_mem_properties
Definition: pi.h:558
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
_pi_usm_migration_flags
Definition: pi.h:1639
const pi_bool PI_FALSE
Definition: pi.h:477
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_BUFFER_CREATE_TYPE_REGION
Definition: pi.h:474
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
uint32_t pi_uint32
Definition: pi.h:103
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
pi_bitfield pi_mem_flags
Definition: pi.h:542
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
pi_result piMemRelease(pi_mem mem)
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:547
constexpr pi_map_flags PI_MAP_WRITE
Definition: pi.h:554
constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION
Definition: pi.h:555
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
constexpr pi_map_flags PI_MAP_READ
Definition: pi.h:553
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:544
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
Definition: pi.h:560
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piEventRetain(pi_event event)
MemType
PI context mapping to a CUDA context object.
Definition: pi_cuda.hpp:160
PI Event mapping to CUevent.
Definition: pi_cuda.hpp:608
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:222
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:393