DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_manager.cpp
Go to the documentation of this file.
1 //==-------------- memory_manager.cpp --------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include <detail/event_impl.hpp>
14 #include <detail/pi_utils.hpp>
15 #include <detail/queue_impl.hpp>
16 #include <detail/xpti_registry.hpp>
17 
18 #include <sycl/usm/usm_enums.hpp>
20 
22 
23 #include <algorithm>
24 #include <cassert>
25 #include <cstring>
26 #include <vector>
27 
28 #ifdef XPTI_ENABLE_INSTRUMENTATION
29 #include <xpti/xpti_data_types.h>
30 #include <xpti/xpti_trace_framework.hpp>
31 #endif
32 
33 namespace sycl {
34 inline namespace _V1 {
35 namespace detail {
36 
37 #ifdef XPTI_ENABLE_INSTRUMENTATION
38 uint8_t GMemAllocStreamID;
39 xpti::trace_event_data_t *GMemAllocEvent;
40 #endif
41 
42 uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize,
43  size_t GuardZone) {
44  (void)ObjHandle;
45  (void)AllocSize;
46  (void)GuardZone;
47  uint64_t CorrelationID = 0;
48 #ifdef XPTI_ENABLE_INSTRUMENTATION
49  constexpr uint16_t NotificationTraceType =
50  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_begin);
51  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
52  xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 /* alloc ptr */, AllocSize,
53  GuardZone};
54 
55  CorrelationID = xptiGetUniqueId();
56  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
57  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
58  }
59 #endif
60  return CorrelationID;
61 }
62 
63 void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
64  size_t AllocSize, size_t GuardZone,
65  uint64_t CorrelationID) {
66  (void)ObjHandle;
67  (void)AllocPtr;
68  (void)AllocSize;
69  (void)GuardZone;
70  (void)CorrelationID;
71 #ifdef XPTI_ENABLE_INSTRUMENTATION
72  constexpr uint16_t NotificationTraceType =
73  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_end);
74  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
75  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
76 
77  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
78  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
79  }
80 #endif
81 }
82 
83 uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr) {
84  (void)ObjHandle;
85  (void)AllocPtr;
86  uint64_t CorrelationID = 0;
87 #ifdef XPTI_ENABLE_INSTRUMENTATION
88  constexpr uint16_t NotificationTraceType =
89  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_begin);
90  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
91  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
92  0 /* guard zone */};
93 
94  CorrelationID = xptiGetUniqueId();
95  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
96  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
97  }
98 #endif
99  return CorrelationID;
100 }
101 
102 void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
103  uint64_t CorrelationID) {
104  (void)ObjHandle;
105  (void)AllocPtr;
106  (void)CorrelationID;
107 #ifdef XPTI_ENABLE_INSTRUMENTATION
108  constexpr uint16_t NotificationTraceType =
109  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_end);
110  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
111  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
112  0 /* guard zone */};
113 
114  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
115  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
116  }
117 #endif
118 }
119 
120 static void waitForEvents(const std::vector<EventImplPtr> &Events) {
121  // Assuming all events will be on the same device or
122  // devices associated with the same Backend.
123  if (!Events.empty()) {
124  const PluginPtr &Plugin = Events[0]->getPlugin();
125  std::vector<sycl::detail::pi::PiEvent> PiEvents(Events.size());
126  std::transform(Events.begin(), Events.end(), PiEvents.begin(),
127  [](const EventImplPtr &EventImpl) {
128  return EventImpl->getHandleRef();
129  });
130  Plugin->call<PiApiKind::piEventsWait>(PiEvents.size(), &PiEvents[0]);
131  }
132 }
133 
135  pi_mem_flags Flags, size_t Size, void *HostPtr,
136  pi_mem *RetMem, const pi_mem_properties *Props) {
137 #ifdef XPTI_ENABLE_INSTRUMENTATION
138  uint64_t CorrID = 0;
139 #endif
140  // We only want to instrument piMemBufferCreate
141  {
142 #ifdef XPTI_ENABLE_INSTRUMENTATION
143  CorrID =
144  emitMemAllocBeginTrace(0 /* mem object */, Size, 0 /* guard zone */);
145  xpti::utils::finally _{[&] {
146  // C-style cast is required for MSVC
147  uintptr_t MemObjID = (uintptr_t)(*RetMem);
148  pi_native_handle Ptr = 0;
149  // Always use call_nocheck here, because call may throw an exception,
150  // and this lambda will be called from destructor, which in combination
151  // rewards us with UB.
152  // When doing buffer interop we don't know what device the memory should
153  // be resident on, so pass nullptr for Device param. Buffer interop may
154  // not be supported by all backends.
155  Plugin->call_nocheck<PiApiKind::piextMemGetNativeHandle>(
156  *RetMem, /*Dev*/ nullptr, &Ptr);
157  emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */,
158  CorrID);
159  }};
160 #endif
161  if (Size)
162  Plugin->call<PiApiKind::piMemBufferCreate>(Ctx, Flags, Size, HostPtr,
163  RetMem, Props);
164  }
165 }
166 
167 void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem) {
168  // FIXME piMemRelease does not guarante memory release. It is only true if
169  // reference counter is 1. However, SYCL runtime currently only calls
170  // piMemRetain only for OpenCL interop
171 #ifdef XPTI_ENABLE_INSTRUMENTATION
172  uint64_t CorrID = 0;
173  // C-style cast is required for MSVC
174  uintptr_t MemObjID = (uintptr_t)(Mem);
175  uintptr_t Ptr = 0;
176  // Do not make unnecessary PI calls without instrumentation enabled
177  if (xptiTraceEnabled()) {
178  pi_native_handle PtrHandle = 0;
179  // When doing buffer interop we don't know what device the memory should be
180  // resident on, so pass nullptr for Device param. Buffer interop may not be
181  // supported by all backends.
182  Plugin->call<PiApiKind::piextMemGetNativeHandle>(Mem, /*Dev*/ nullptr,
183  &PtrHandle);
184  Ptr = (uintptr_t)(PtrHandle);
185  }
186 #endif
187  // We only want to instrument piMemRelease
188  {
189 #ifdef XPTI_ENABLE_INSTRUMENTATION
190  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
191  xpti::utils::finally _{
192  [&] { emitMemReleaseEndTrace(MemObjID, Ptr, CorrID); }};
193 #endif
194  Plugin->call<PiApiKind::piMemRelease>(Mem);
195  }
196 }
197 
198 void memBufferMapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Buffer,
199  pi_bool Blocking, pi_map_flags Flags, size_t Offset,
200  size_t Size, pi_uint32 NumEvents,
201  const pi_event *WaitList, pi_event *Event,
202  void **RetMap) {
203 #ifdef XPTI_ENABLE_INSTRUMENTATION
204  uint64_t CorrID = 0;
205  uintptr_t MemObjID = (uintptr_t)(Buffer);
206 #endif
207  // We only want to instrument piEnqueueMemBufferMap
208 
209 #ifdef XPTI_ENABLE_INSTRUMENTATION
210  CorrID = emitMemAllocBeginTrace(MemObjID, Size, 0 /* guard zone */);
211  xpti::utils::finally _{[&] {
212  emitMemAllocEndTrace(MemObjID, (uintptr_t)(*RetMap), Size,
213  0 /* guard zone */, CorrID);
214  }};
215 #endif
216  Plugin->call<PiApiKind::piEnqueueMemBufferMap>(Queue, Buffer, Blocking, Flags,
217  Offset, Size, NumEvents,
218  WaitList, Event, RetMap);
219 }
220 
221 void memUnmapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Mem,
222  void *MappedPtr, pi_uint32 NumEvents,
223  const pi_event *WaitList, pi_event *Event) {
224 #ifdef XPTI_ENABLE_INSTRUMENTATION
225  uint64_t CorrID = 0;
226  uintptr_t MemObjID = (uintptr_t)(Mem);
227  uintptr_t Ptr = (uintptr_t)(MappedPtr);
228 #endif
229  // We only want to instrument piEnqueueMemUnmap
230  {
231 #ifdef XPTI_ENABLE_INSTRUMENTATION
232  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
233  xpti::utils::finally _{[&] {
234  // There's no way for SYCL to know, when the pointer is freed, so we have
235  // to explicitly wait for the end of data transfers here in order to
236  // report correct events.
237  // Always use call_nocheck here, because call may throw an exception,
238  // and this lambda will be called from destructor, which in combination
239  // rewards us with UB.
240  Plugin->call_nocheck<PiApiKind::piEventsWait>(1, Event);
241  emitMemReleaseEndTrace(MemObjID, Ptr, CorrID);
242  }};
243 #endif
244  Plugin->call<PiApiKind::piEnqueueMemUnmap>(Queue, Mem, MappedPtr, NumEvents,
245  WaitList, Event);
246  }
247 }
248 
250  void *MemAllocation,
251  std::vector<EventImplPtr> DepEvents,
252  sycl::detail::pi::PiEvent &OutEvent) {
253  // There is no async API for memory releasing. Explicitly wait for all
254  // dependency events and return empty event.
255  waitForEvents(DepEvents);
256  OutEvent = nullptr;
257  XPTIRegistry::bufferReleaseNotification(MemObj, MemAllocation);
258  MemObj->releaseMem(TargetContext, MemAllocation);
259 }
260 
262  SYCLMemObjI *MemObj, void *MemAllocation,
263  void *UserPtr) {
264  if (UserPtr == MemAllocation) {
265  // Do nothing as it's user provided memory.
266  return;
267  }
268 
269  if (!TargetContext) {
270  MemObj->releaseHostMem(MemAllocation);
271  return;
272  }
273 
274  const PluginPtr &Plugin = TargetContext->getPlugin();
275  memReleaseHelper(Plugin, pi::cast<sycl::detail::pi::PiMem>(MemAllocation));
276 }
277 
279  bool InitFromUserData, void *HostPtr,
280  std::vector<EventImplPtr> DepEvents,
281  sycl::detail::pi::PiEvent &OutEvent) {
282  // There is no async API for memory allocation. Explicitly wait for all
283  // dependency events and return empty event.
284  waitForEvents(DepEvents);
285  OutEvent = nullptr;
286 
287  return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr,
288  OutEvent);
289 }
290 
291 void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
292  bool HostPtrReadOnly, size_t Size,
293  const sycl::property_list &) {
294  std::ignore = HostPtrReadOnly;
295  std::ignore = Size;
296 
297  // Can return user pointer directly if it is not a nullptr.
298  if (UserPtr)
299  return UserPtr;
300 
301  return MemObj->allocateHostMem();
302 }
303 
305  ContextImplPtr TargetContext, void *UserPtr,
306  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
307  const sycl::property_list &, sycl::detail::pi::PiEvent &OutEventToWait) {
308  (void)TargetContext;
309  (void)InteropContext;
310  // If memory object is created with interop c'tor return cl_mem as is.
311  assert(TargetContext == InteropContext && "Expected matching contexts");
312  OutEventToWait = InteropEvent->getHandleRef();
313  // Retain the event since it will be released during alloca command
314  // destruction
315  if (nullptr != OutEventToWait) {
316  const PluginPtr &Plugin = InteropEvent->getPlugin();
317  Plugin->call<PiApiKind::piEventRetain>(OutEventToWait);
318  }
319  return UserPtr;
320 }
321 
323 getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly) {
324  // Create read_write mem object to handle arbitrary uses.
327  if (UserPtr)
328  Result |= PI_MEM_FLAGS_HOST_PTR_USE;
329  return Result;
330 }
331 
333  ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly,
336  const sycl::property_list &) {
337  sycl::detail::pi::PiMemFlags CreationFlags =
338  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
339 
341  const PluginPtr &Plugin = TargetContext->getPlugin();
342  Plugin->call<PiApiKind::piMemImageCreate>(TargetContext->getHandleRef(),
343  CreationFlags, &Format, &Desc,
344  UserPtr, &NewMem);
345  return NewMem;
346 }
347 
348 void *
350  bool HostPtrReadOnly, const size_t Size,
351  const sycl::property_list &PropsList) {
352  sycl::detail::pi::PiMemFlags CreationFlags =
353  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
354  if (PropsList.has_property<
356  CreationFlags |= PI_MEM_FLAGS_HOST_PTR_ALLOC;
357 
358  sycl::detail::pi::PiMem NewMem = nullptr;
359  const PluginPtr &Plugin = TargetContext->getPlugin();
360 
361  std::vector<pi_mem_properties> AllocProps;
362 
364  TargetContext->isBufferLocationSupported()) {
365  auto Location =
367  .get_buffer_location();
368  AllocProps.reserve(AllocProps.size() + 2);
369  AllocProps.push_back(PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION);
370  AllocProps.push_back(Location);
371  }
372 
373  if (PropsList.has_property<property::buffer::mem_channel>()) {
374  auto Channel =
375  PropsList.get_property<property::buffer::mem_channel>().get_channel();
376  AllocProps.reserve(AllocProps.size() + 2);
377  AllocProps.push_back(PI_MEM_PROPERTIES_CHANNEL);
378  AllocProps.push_back(Channel);
379  }
380 
381  pi_mem_properties *AllocPropsPtr = nullptr;
382  if (!AllocProps.empty()) {
383  // If there are allocation properties, push an end to the list and update
384  // the properties pointer.
385  AllocProps.push_back(0);
386  AllocPropsPtr = AllocProps.data();
387  }
388 
389  memBufferCreateHelper(Plugin, TargetContext->getHandleRef(), CreationFlags,
390  Size, UserPtr, &NewMem, AllocPropsPtr);
391  return NewMem;
392 }
393 
395  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
396  bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent,
397  const ContextImplPtr &InteropContext, const sycl::property_list &PropsList,
398  sycl::detail::pi::PiEvent &OutEventToWait) {
399  void *MemPtr;
400  if (!TargetContext)
401  MemPtr =
402  allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
403  else if (UserPtr && InteropContext)
404  MemPtr =
405  allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
406  InteropContext, PropsList, OutEventToWait);
407  else
408  MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
409  PropsList);
411  return MemPtr;
412 }
413 
415  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
416  bool HostPtrReadOnly, size_t Size,
419  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
420  const sycl::property_list &PropsList,
421  sycl::detail::pi::PiEvent &OutEventToWait) {
422  if (!TargetContext)
423  return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
424  PropsList);
425  if (UserPtr && InteropContext)
426  return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
427  InteropContext, PropsList, OutEventToWait);
428  return allocateImageObject(TargetContext, UserPtr, HostPtrReadOnly, Desc,
429  Format, PropsList);
430 }
431 
433  void *ParentMemObj, size_t ElemSize,
434  size_t Offset, range<3> Range,
435  std::vector<EventImplPtr> DepEvents,
436  sycl::detail::pi::PiEvent &OutEvent) {
437  waitForEvents(DepEvents);
438  OutEvent = nullptr;
439 
440  if (!TargetContext)
441  return static_cast<void *>(static_cast<char *>(ParentMemObj) + Offset);
442 
443  size_t SizeInBytes = ElemSize;
444  for (size_t I = 0; I < 3; ++I)
445  SizeInBytes *= Range[I];
446 
447  sycl::detail::pi::PiResult Error = PI_SUCCESS;
448  pi_buffer_region_struct Region{Offset, SizeInBytes};
450  const PluginPtr &Plugin = TargetContext->getPlugin();
451  Error = Plugin->call_nocheck<PiApiKind::piMemBufferPartition>(
452  pi::cast<sycl::detail::pi::PiMem>(ParentMemObj), PI_MEM_FLAGS_ACCESS_RW,
453  PI_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem);
454  if (Error == PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET)
455  throw detail::set_pi_error(
457  "Specified offset of the sub-buffer being constructed is not "
458  "a multiple of the memory base address alignment"),
459  Error);
460 
461  if (Error != PI_SUCCESS) {
463  "allocateMemSubBuffer() failed"),
464  Error);
465  }
466 
467  return NewMem;
468 }
469 
471  int XTerm;
472  int YTerm;
473  int ZTerm;
474 };
477  // For buffers, the offsets/ranges coming from accessor are always
478  // id<3>/range<3> But their organization varies by dimension:
479  // 1 ==> {width, 1, 1}
480  // 2 ==> {height, width, 1}
481  // 3 ==> {depth, height, width}
482  // Some callers schedule 0 as DimDst/DimSrc.
483 
484  if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
485  if (Dimensions == 3) {
486  pos.XTerm = 2, pos.YTerm = 1, pos.ZTerm = 0;
487  } else if (Dimensions == 2) {
488  pos.XTerm = 1, pos.YTerm = 0, pos.ZTerm = 2;
489  } else { // Dimension is 1 or 0
490  pos.XTerm = 0, pos.YTerm = 1, pos.ZTerm = 2;
491  }
492  } else { // While range<>/id<> use by images is different than buffers, it's
493  // consistent with their accessors.
494  pos.XTerm = 0;
495  pos.YTerm = 1;
496  pos.ZTerm = 2;
497  }
498 }
499 
500 void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
501  unsigned int DimSrc, sycl::range<3> SrcSize,
502  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
503  unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem,
504  QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range<3> DstSize,
505  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
506  unsigned int DstElemSize,
507  std::vector<sycl::detail::pi::PiEvent> DepEvents,
508  sycl::detail::pi::PiEvent &OutEvent,
509  const detail::EventImplPtr &OutEventImpl) {
510  (void)SrcAccessRange;
511  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
512  assert(TgtQueue && "Destination mem object queue must be not nullptr");
513 
514  const sycl::detail::pi::PiQueue Queue = TgtQueue->getHandleRef();
515  const PluginPtr &Plugin = TgtQueue->getPlugin();
516 
517  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
518  TermPositions SrcPos, DstPos;
519  prepTermPositions(SrcPos, DimSrc, MemType);
520  prepTermPositions(DstPos, DimDst, MemType);
521 
522  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
523  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
524  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
525  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
526  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
527 
528  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
529  if (1 == DimDst && 1 == DimSrc) {
530  if (OutEventImpl != nullptr)
531  OutEventImpl->setHostEnqueueTime();
533  Queue, DstMem,
534  /*blocking_write=*/PI_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
535  SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
536  } else {
537  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
538  size_t BufferSlicePitch =
539  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
540  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
541  size_t HostSlicePitch =
542  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
543 
544  pi_buff_rect_offset_struct BufferOffset{
545  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
546  pi_buff_rect_offset_struct HostOffset{
547  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
548  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
549  DstAccessRange[DstPos.YTerm],
550  DstAccessRange[DstPos.ZTerm]};
551  if (OutEventImpl != nullptr)
552  OutEventImpl->setHostEnqueueTime();
554  Queue, DstMem,
555  /*blocking_write=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
556  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
557  SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
558  }
559  } else {
560  size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
561  size_t InputSlicePitch =
562  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
563 
564  pi_image_offset_struct Origin{DstOffset[DstPos.XTerm],
565  DstOffset[DstPos.YTerm],
566  DstOffset[DstPos.ZTerm]};
567  pi_image_region_struct Region{DstAccessRange[DstPos.XTerm],
568  DstAccessRange[DstPos.YTerm],
569  DstAccessRange[DstPos.ZTerm]};
570  if (OutEventImpl != nullptr)
571  OutEventImpl->setHostEnqueueTime();
572  Plugin->call<PiApiKind::piEnqueueMemImageWrite>(
573  Queue, DstMem,
574  /*blocking_write=*/PI_FALSE, &Origin, &Region, InputRowPitch,
575  InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
576  }
577 }
578 
579 void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
580  QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize,
581  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
582  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
583  unsigned int DimDst, sycl::range<3> DstSize,
584  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
585  unsigned int DstElemSize,
586  std::vector<sycl::detail::pi::PiEvent> DepEvents,
587  sycl::detail::pi::PiEvent &OutEvent,
588  const detail::EventImplPtr &OutEventImpl) {
589  (void)DstAccessRange;
590  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
591  assert(SrcQueue && "Source mem object queue is expected to be not nullptr");
592 
593  const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
594  const PluginPtr &Plugin = SrcQueue->getPlugin();
595 
596  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
597  TermPositions SrcPos, DstPos;
598  prepTermPositions(SrcPos, DimSrc, MemType);
599  prepTermPositions(DstPos, DimDst, MemType);
600 
601  // For a given buffer, the various mem copy routines (copyD2H, copyH2D,
602  // copyD2D) will usually have the same values for AccessRange, Size,
603  // Dimension, Offset, etc. EXCEPT when the dtor for ~SYCLMemObjT is called.
604  // Essentially, it schedules a copyBack of chars thus in copyD2H the
605  // Dimension will then be 1 and DstAccessRange[0] and DstSize[0] will be
606  // sized to bytes with a DstElemSize of 1.
607  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
608  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
609  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
610  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
611  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
612 
613  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
614  if (1 == DimDst && 1 == DimSrc) {
615  if (OutEventImpl != nullptr)
616  OutEventImpl->setHostEnqueueTime();
617  Plugin->call<PiApiKind::piEnqueueMemBufferRead>(
618  Queue, SrcMem,
619  /*blocking_read=*/PI_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
620  DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
621  } else {
622  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
623  size_t BufferSlicePitch =
624  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
625  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
626  size_t HostSlicePitch =
627  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
628 
629  pi_buff_rect_offset_struct BufferOffset{
630  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
631  pi_buff_rect_offset_struct HostOffset{
632  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
633  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
634  SrcAccessRange[SrcPos.YTerm],
635  SrcAccessRange[SrcPos.ZTerm]};
636  if (OutEventImpl != nullptr)
637  OutEventImpl->setHostEnqueueTime();
639  Queue, SrcMem,
640  /*blocking_read=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
641  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
642  DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
643  }
644  } else {
645  size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
646  size_t SlicePitch =
647  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
648 
649  pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm],
650  SrcOffset[SrcPos.YTerm],
651  SrcOffset[SrcPos.ZTerm]};
652  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
653  SrcAccessRange[SrcPos.YTerm],
654  SrcAccessRange[SrcPos.ZTerm]};
655  if (OutEventImpl != nullptr)
656  OutEventImpl->setHostEnqueueTime();
657  Plugin->call<PiApiKind::piEnqueueMemImageRead>(
658  Queue, SrcMem, PI_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
659  DepEvents.size(), DepEvents.data(), &OutEvent);
660  }
661 }
662 
663 void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
664  QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize,
665  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
666  unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem,
667  QueueImplPtr, unsigned int DimDst, sycl::range<3> DstSize,
668  sycl::range<3>, sycl::id<3> DstOffset, unsigned int DstElemSize,
669  std::vector<sycl::detail::pi::PiEvent> DepEvents,
670  sycl::detail::pi::PiEvent &OutEvent,
671  const detail::EventImplPtr &OutEventImpl) {
672  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
673  assert(SrcQueue && "Source mem object and target mem object queues are "
674  "expected to be not nullptr");
675 
676  const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
677  const PluginPtr &Plugin = SrcQueue->getPlugin();
678 
679  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
680  TermPositions SrcPos, DstPos;
681  prepTermPositions(SrcPos, DimSrc, MemType);
682  prepTermPositions(DstPos, DimDst, MemType);
683 
684  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
685  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
686  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
687  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
688  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
689 
690  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
691  if (1 == DimDst && 1 == DimSrc) {
692  if (OutEventImpl != nullptr)
693  OutEventImpl->setHostEnqueueTime();
694  Plugin->call<PiApiKind::piEnqueueMemBufferCopy>(
695  Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
696  SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
697  &OutEvent);
698  } else {
699  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
700  // calculate both src and dest pitch using region[0], which is not correct
701  // if src and dest are not the same size.
702  size_t SrcRowPitch = SrcSzWidthBytes;
703  size_t SrcSlicePitch = (DimSrc <= 1)
704  ? SrcSzWidthBytes
705  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
706  size_t DstRowPitch = DstSzWidthBytes;
707  size_t DstSlicePitch = (DimDst <= 1)
708  ? DstSzWidthBytes
709  : DstSzWidthBytes * DstSize[DstPos.YTerm];
710 
711  pi_buff_rect_offset_struct SrcOrigin{
712  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
713  pi_buff_rect_offset_struct DstOrigin{
714  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
715  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
716  SrcAccessRange[SrcPos.YTerm],
717  SrcAccessRange[SrcPos.ZTerm]};
718  if (OutEventImpl != nullptr)
719  OutEventImpl->setHostEnqueueTime();
721  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
722  SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
723  DepEvents.data(), &OutEvent);
724  }
725  } else {
726  pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm],
727  SrcOffset[SrcPos.YTerm],
728  SrcOffset[SrcPos.ZTerm]};
729  pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm],
730  DstOffset[DstPos.YTerm],
731  DstOffset[DstPos.ZTerm]};
732  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
733  SrcAccessRange[SrcPos.YTerm],
734  SrcAccessRange[SrcPos.ZTerm]};
735  if (OutEventImpl != nullptr)
736  OutEventImpl->setHostEnqueueTime();
737  Plugin->call<PiApiKind::piEnqueueMemImageCopy>(
738  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
739  DepEvents.size(), DepEvents.data(), &OutEvent);
740  }
741 }
742 
743 static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr,
744  unsigned int DimSrc, sycl::range<3> SrcSize,
745  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
746  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
747  unsigned int DimDst, sycl::range<3> DstSize,
748  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
749  unsigned int DstElemSize,
750  std::vector<sycl::detail::pi::PiEvent>,
752  if ((DimSrc != 1 || DimDst != 1) &&
753  (SrcOffset != id<3>{0, 0, 0} || DstOffset != id<3>{0, 0, 0} ||
754  SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
755  throw exception(make_error_code(errc::feature_not_supported),
756  "Not supported configuration of memcpy requested");
757  }
758 
759  SrcMem += SrcOffset[0] * SrcElemSize;
760  DstMem += DstOffset[0] * DstElemSize;
761 
762  if (SrcMem == DstMem)
763  return;
764 
765  size_t BytesToCopy =
766  SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
767  std::memcpy(DstMem, SrcMem, BytesToCopy);
768 }
769 
770 // Copies memory between: host and device, host and host,
771 // device and device if memory objects bound to the one context.
772 void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
773  QueueImplPtr SrcQueue, unsigned int DimSrc,
774  sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange,
775  sycl::id<3> SrcOffset, unsigned int SrcElemSize,
776  void *DstMem, QueueImplPtr TgtQueue,
777  unsigned int DimDst, sycl::range<3> DstSize,
778  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
779  unsigned int DstElemSize,
780  std::vector<sycl::detail::pi::PiEvent> DepEvents,
781  sycl::detail::pi::PiEvent &OutEvent,
782  const detail::EventImplPtr &OutEventImpl) {
783 
784  if (!SrcQueue) {
785  if (!TgtQueue)
786  copyH2H(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize,
787  SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, nullptr,
788  DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
789  std::move(DepEvents), OutEvent, OutEventImpl);
790  else
791  copyH2D(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize,
792  SrcAccessRange, SrcOffset, SrcElemSize,
793  pi::cast<sycl::detail::pi::PiMem>(DstMem), std::move(TgtQueue),
794  DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
795  std::move(DepEvents), OutEvent, OutEventImpl);
796  } else {
797  if (!TgtQueue)
798  copyD2H(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
799  std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
800  SrcElemSize, (char *)DstMem, nullptr, DimDst, DstSize,
801  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
802  OutEvent, OutEventImpl);
803  else
804  copyD2D(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
805  std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
806  SrcElemSize, pi::cast<sycl::detail::pi::PiMem>(DstMem),
807  std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
808  DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
809  }
810 }
811 
812 void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
813  size_t PatternSize, const unsigned char *Pattern,
814  unsigned int Dim, sycl::range<3> MemRange,
815  sycl::range<3> AccRange, sycl::id<3> Offset,
816  unsigned int ElementSize,
817  std::vector<sycl::detail::pi::PiEvent> DepEvents,
818  sycl::detail::pi::PiEvent &OutEvent,
819  const detail::EventImplPtr &OutEventImpl) {
820  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
821  assert(Queue && "Fill should be called only with a valid device queue");
822 
823  const PluginPtr &Plugin = Queue->getPlugin();
824 
825  if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
826  if (OutEventImpl != nullptr)
827  OutEventImpl->setHostEnqueueTime();
828 
829  // 2D and 3D buffers accessors can't have custom range or the data will
830  // likely be discontiguous.
831  bool RangesUsable = (Dim <= 1) || (MemRange == AccRange);
832  // For 2D and 3D buffers, the offset must be 0, or the data will be
833  // discontiguous.
834  bool OffsetUsable = (Dim <= 1) || (Offset == sycl::id<3>{0, 0, 0});
835  size_t RangeMultiplier = AccRange[0] * AccRange[1] * AccRange[2];
836 
837  if (RangesUsable && OffsetUsable) {
838  Plugin->call<PiApiKind::piEnqueueMemBufferFill>(
839  Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem),
840  Pattern, PatternSize, Offset[0] * ElementSize,
841  RangeMultiplier * ElementSize, DepEvents.size(), DepEvents.data(),
842  &OutEvent);
843  return;
844  }
845  // The sycl::handler uses a parallel_for kernel in the case of unusable
846  // Range or Offset, not CG:Fill. So we should not be here.
847  throw exception(make_error_code(errc::runtime),
848  "Not supported configuration of fill requested");
849  } else {
850  if (OutEventImpl != nullptr)
851  OutEventImpl->setHostEnqueueTime();
852  // images don't support offset accessors and thus avoid issues of
853  // discontinguous data
854  Plugin->call<PiApiKind::piEnqueueMemImageFill>(
855  Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
856  &Offset[0], &AccRange[0], DepEvents.size(), DepEvents.data(),
857  &OutEvent);
858  }
859 }
860 
861 void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
862  access::mode AccessMode, unsigned int, sycl::range<3>,
863  sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
864  unsigned int ElementSize,
865  std::vector<sycl::detail::pi::PiEvent> DepEvents,
866  sycl::detail::pi::PiEvent &OutEvent) {
867  if (!Queue) {
868  throw exception(make_error_code(errc::runtime),
869  "Not supported configuration of map requested");
870  }
871 
872  pi_map_flags Flags = 0;
873 
874  switch (AccessMode) {
875  case access::mode::read:
876  Flags |= PI_MAP_READ;
877  break;
878  case access::mode::write:
879  Flags |= PI_MAP_WRITE;
880  break;
882  case access::mode::atomic:
883  Flags = PI_MAP_WRITE | PI_MAP_READ;
884  break;
885  case access::mode::discard_write:
886  case access::mode::discard_read_write:
888  break;
889  }
890 
891  AccessOffset[0] *= ElementSize;
892  AccessRange[0] *= ElementSize;
893 
894  // TODO: Handle offset
895  assert(AccessOffset[0] == 0 && "Handle offset");
896 
897  void *MappedPtr = nullptr;
898  const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
899  const PluginPtr &Plugin = Queue->getPlugin();
900  memBufferMapHelper(Plugin, Queue->getHandleRef(),
901  pi::cast<sycl::detail::pi::PiMem>(Mem), PI_FALSE, Flags,
902  AccessOffset[0], BytesToMap, DepEvents.size(),
903  DepEvents.data(), &OutEvent, &MappedPtr);
904  return MappedPtr;
905 }
906 
908  void *MappedPtr,
909  std::vector<sycl::detail::pi::PiEvent> DepEvents,
910  sycl::detail::pi::PiEvent &OutEvent) {
911 
912  // Execution on host is not supported here.
913  if (!Queue) {
914  throw exception(make_error_code(errc::runtime),
915  "Not supported configuration of unmap requested");
916  }
917  // All DepEvents are to the same Context.
918  // Using the plugin of the Queue.
919 
920  const PluginPtr &Plugin = Queue->getPlugin();
921  memUnmapHelper(Plugin, Queue->getHandleRef(),
922  pi::cast<sycl::detail::pi::PiMem>(Mem), MappedPtr,
923  DepEvents.size(), DepEvents.data(), &OutEvent);
924 }
925 
926 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
927  size_t Len, void *DstMem,
928  std::vector<sycl::detail::pi::PiEvent> DepEvents,
929  sycl::detail::pi::PiEvent *OutEvent,
930  const detail::EventImplPtr &OutEventImpl) {
931  assert(SrcQueue && "USM copy must be called with a valid device queue");
932  if (!Len) { // no-op, but ensure DepEvents will still be waited on
933  if (!DepEvents.empty()) {
934  if (OutEventImpl != nullptr)
935  OutEventImpl->setHostEnqueueTime();
936  SrcQueue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
937  SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
938  OutEvent);
939  }
940  return;
941  }
942 
943  if (!SrcMem || !DstMem)
944  throw exception(make_error_code(errc::invalid),
945  "NULL pointer argument in memory copy operation.");
946 
947  const PluginPtr &Plugin = SrcQueue->getPlugin();
948  if (OutEventImpl != nullptr)
949  OutEventImpl->setHostEnqueueTime();
950  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy>(
951  SrcQueue->getHandleRef(),
952  /* blocking */ PI_FALSE, DstMem, SrcMem, Len, DepEvents.size(),
953  DepEvents.data(), OutEvent);
954 }
955 
956 void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
957  const std::vector<unsigned char> &Pattern,
958  std::vector<sycl::detail::pi::PiEvent> DepEvents,
959  sycl::detail::pi::PiEvent *OutEvent,
960  const detail::EventImplPtr &OutEventImpl) {
961  assert(Queue && "USM fill must be called with a valid device queue");
962  if (!Length) { // no-op, but ensure DepEvents will still be waited on
963  if (!DepEvents.empty()) {
964  if (OutEventImpl != nullptr)
965  OutEventImpl->setHostEnqueueTime();
966  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
967  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
968  }
969  return;
970  }
971 
972  if (!Mem)
973  throw exception(make_error_code(errc::invalid),
974  "NULL pointer argument in memory fill operation.");
975  if (OutEventImpl != nullptr)
976  OutEventImpl->setHostEnqueueTime();
977  const PluginPtr &Plugin = Queue->getPlugin();
978  Plugin->call<PiApiKind::piextUSMEnqueueFill>(
979  Queue->getHandleRef(), Mem, Pattern.data(), Pattern.size(), Length,
980  DepEvents.size(), DepEvents.data(), OutEvent);
981 }
982 
983 void MemoryManager::prefetch_usm(
984  void *Mem, QueueImplPtr Queue, size_t Length,
985  std::vector<sycl::detail::pi::PiEvent> DepEvents,
986  sycl::detail::pi::PiEvent *OutEvent,
987  const detail::EventImplPtr &OutEventImpl) {
988  assert(Queue && "USM prefetch must be called with a valid device queue");
989  const PluginPtr &Plugin = Queue->getPlugin();
990  if (OutEventImpl != nullptr)
991  OutEventImpl->setHostEnqueueTime();
993  Queue->getHandleRef(), Mem, Length, _pi_usm_migration_flags(0),
994  DepEvents.size(), DepEvents.data(), OutEvent);
995 }
996 
997 void MemoryManager::advise_usm(
998  const void *Mem, QueueImplPtr Queue, size_t Length, pi_mem_advice Advice,
999  std::vector<sycl::detail::pi::PiEvent> /*DepEvents*/,
1000  sycl::detail::pi::PiEvent *OutEvent,
1001  const detail::EventImplPtr &OutEventImpl) {
1002  assert(Queue && "USM advise must be called with a valid device queue");
1003  const PluginPtr &Plugin = Queue->getPlugin();
1004  if (OutEventImpl != nullptr)
1005  OutEventImpl->setHostEnqueueTime();
1006  Plugin->call<PiApiKind::piextUSMEnqueueMemAdvise>(Queue->getHandleRef(), Mem,
1007  Length, Advice, OutEvent);
1008 }
1009 
1010 void MemoryManager::copy_2d_usm(
1011  const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem,
1012  size_t DstPitch, size_t Width, size_t Height,
1013  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1014  sycl::detail::pi::PiEvent *OutEvent,
1015  const detail::EventImplPtr &OutEventImpl) {
1016  assert(Queue && "USM copy 2d must be called with a valid device queue");
1017  if (Width == 0 || Height == 0) {
1018  // no-op, but ensure DepEvents will still be waited on
1019  if (!DepEvents.empty()) {
1020  if (OutEventImpl != nullptr)
1021  OutEventImpl->setHostEnqueueTime();
1022  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1023  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1024  }
1025  return;
1026  }
1027 
1028  if (!DstMem || !SrcMem)
1029  throw sycl::exception(sycl::make_error_code(errc::invalid),
1030  "NULL pointer argument in 2D memory copy operation.");
1031 
1032  const PluginPtr &Plugin = Queue->getPlugin();
1033 
1034  pi_bool SupportsUSMMemcpy2D = false;
1036  Queue->getContextImplPtr()->getHandleRef(),
1038  &SupportsUSMMemcpy2D, nullptr);
1039 
1040  if (SupportsUSMMemcpy2D) {
1041  if (OutEventImpl != nullptr)
1042  OutEventImpl->setHostEnqueueTime();
1043  // Direct memcpy2D is supported so we use this function.
1044  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy2D>(
1045  Queue->getHandleRef(), /*blocking=*/PI_FALSE, DstMem, DstPitch, SrcMem,
1046  SrcPitch, Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1047  return;
1048  }
1049 
1050  // Otherwise we allow the special case where the copy is to or from host.
1051 #ifndef NDEBUG
1052  context Ctx = createSyclObjFromImpl<context>(Queue->getContextImplPtr());
1053  usm::alloc SrcAllocType = get_pointer_type(SrcMem, Ctx);
1054  usm::alloc DstAllocType = get_pointer_type(DstMem, Ctx);
1055  bool SrcIsHost =
1056  SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
1057  bool DstIsHost =
1058  DstAllocType == usm::alloc::unknown || DstAllocType == usm::alloc::host;
1059  assert((SrcIsHost || DstIsHost) && "In fallback path for copy_2d_usm either "
1060  "source or destination must be on host.");
1061 #endif // NDEBUG
1062 
1063  // The fallback in this case is to insert a copy per row.
1064  std::vector<OwnedPiEvent> CopyEventsManaged;
1065  CopyEventsManaged.reserve(Height);
1066  // We'll need continuous range of events for a wait later as well.
1067  std::vector<sycl::detail::pi::PiEvent> CopyEvents(Height);
1068  if (OutEventImpl != nullptr)
1069  OutEventImpl->setHostEnqueueTime();
1070  for (size_t I = 0; I < Height; ++I) {
1071  char *DstItBegin = static_cast<char *>(DstMem) + I * DstPitch;
1072  const char *SrcItBegin = static_cast<const char *>(SrcMem) + I * SrcPitch;
1073  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy>(
1074  Queue->getHandleRef(), /* blocking */ PI_FALSE, DstItBegin, SrcItBegin,
1075  Width, DepEvents.size(), DepEvents.data(), CopyEvents.data() + I);
1076  CopyEventsManaged.emplace_back(CopyEvents[I], Plugin,
1077  /*TakeOwnership=*/true);
1078  }
1079  if (OutEventImpl != nullptr)
1080  OutEventImpl->setHostEnqueueTime();
1081  // Then insert a wait to coalesce the copy events.
1082  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1083  Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent);
1084 }
1085 
1086 void MemoryManager::fill_2d_usm(
1087  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1088  const std::vector<unsigned char> &Pattern,
1089  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1090  sycl::detail::pi::PiEvent *OutEvent,
1091  const detail::EventImplPtr &OutEventImpl) {
1092  assert(Queue && "USM fill 2d must be called with a valid device queue");
1093  if (Width == 0 || Height == 0) {
1094  // no-op, but ensure DepEvents will still be waited on
1095  if (!DepEvents.empty()) {
1096  if (OutEventImpl != nullptr)
1097  OutEventImpl->setHostEnqueueTime();
1098  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1099  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1100  }
1101  return;
1102  }
1103 
1104  if (!DstMem)
1105  throw sycl::exception(sycl::make_error_code(errc::invalid),
1106  "NULL pointer argument in 2D memory fill operation.");
1107  if (OutEventImpl != nullptr)
1108  OutEventImpl->setHostEnqueueTime();
1109  const PluginPtr &Plugin = Queue->getPlugin();
1110  Plugin->call<PiApiKind::piextUSMEnqueueFill2D>(
1111  Queue->getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(),
1112  Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1113 }
1114 
1115 void MemoryManager::memset_2d_usm(
1116  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1117  char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1118  sycl::detail::pi::PiEvent *OutEvent,
1119  const detail::EventImplPtr &OutEventImpl) {
1120  assert(Queue && "USM memset 2d must be called with a valid device queue");
1121  if (Width == 0 || Height == 0) {
1122  // no-op, but ensure DepEvents will still be waited on
1123  if (!DepEvents.empty()) {
1124  if (OutEventImpl != nullptr)
1125  OutEventImpl->setHostEnqueueTime();
1126  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1127  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1128  }
1129  return;
1130  }
1131 
1132  if (!DstMem)
1133  throw sycl::exception(
1134  sycl::make_error_code(errc::invalid),
1135  "NULL pointer argument in 2D memory memset operation.");
1136  if (OutEventImpl != nullptr)
1137  OutEventImpl->setHostEnqueueTime();
1138  const PluginPtr &Plugin = Queue->getPlugin();
1139  Plugin->call<PiApiKind::piextUSMEnqueueMemset2D>(
1140  Queue->getHandleRef(), DstMem, Pitch, static_cast<int>(Value), Width,
1141  Height, DepEvents.size(), DepEvents.data(), OutEvent);
1142 }
1143 
1144 static void
1146  DeviceGlobalMapEntry *DeviceGlobalEntry,
1147  size_t NumBytes, size_t Offset, const void *Src,
1148  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1149  sycl::detail::pi::PiEvent *OutEvent,
1150  const detail::EventImplPtr &OutEventImpl) {
1151  assert(Queue &&
1152  "Copy to device global USM must be called with a valid device queue");
1153  // Get or allocate USM memory for the device_global.
1154  DeviceGlobalUSMMem &DeviceGlobalUSM =
1155  DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
1156  void *Dest = DeviceGlobalUSM.getPtr();
1157 
1158  // OwnedPiEvent will keep the initialization event alive for the duration
1159  // of this function call.
1160  OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin());
1161 
1162  // We may need addtional events, so create a non-const dependency events list
1163  // to use if we need to modify it.
1164  std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1165  const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1166  ZIEvent ? AuxDepEventsStorage : DepEvents;
1167 
1168  // If there is a zero-initializer event the memory operation should wait for
1169  // it.
1170  if (ZIEvent) {
1171  AuxDepEventsStorage = DepEvents;
1172  AuxDepEventsStorage.push_back(ZIEvent.GetEvent());
1173  }
1174 
1175  MemoryManager::copy_usm(Src, Queue, NumBytes,
1176  reinterpret_cast<char *>(Dest) + Offset,
1177  ActualDepEvents, OutEvent, OutEventImpl);
1178 }
1179 
1181  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1182  size_t NumBytes, size_t Offset, void *Dest,
1183  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1184  sycl::detail::pi::PiEvent *OutEvent,
1185  const detail::EventImplPtr &OutEventImpl) {
1186  // Get or allocate USM memory for the device_global. Since we are reading from
1187  // it, we need it initialized if it has not been yet.
1188  DeviceGlobalUSMMem &DeviceGlobalUSM =
1189  DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
1190  void *Src = DeviceGlobalUSM.getPtr();
1191 
1192  // OwnedPiEvent will keep the initialization event alive for the duration
1193  // of this function call.
1194  OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin());
1195 
1196  // We may need addtional events, so create a non-const dependency events list
1197  // to use if we need to modify it.
1198  std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1199  const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1200  ZIEvent ? AuxDepEventsStorage : DepEvents;
1201 
1202  // If there is a zero-initializer event the memory operation should wait for
1203  // it.
1204  if (ZIEvent) {
1205  AuxDepEventsStorage = DepEvents;
1206  AuxDepEventsStorage.push_back(ZIEvent.GetEvent());
1207  }
1208 
1209  MemoryManager::copy_usm(reinterpret_cast<const char *>(Src) + Offset, Queue,
1210  NumBytes, Dest, ActualDepEvents, OutEvent,
1211  OutEventImpl);
1212 }
1213 
1216  DeviceGlobalMapEntry *DeviceGlobalEntry) {
1217  assert(DeviceGlobalEntry->MIsDeviceImageScopeDecorated &&
1218  "device_global is not device image scope decorated.");
1219 
1220  // If the device global is used in multiple device images we cannot proceed.
1221  if (DeviceGlobalEntry->MImageIdentifiers.size() > 1)
1222  throw sycl::exception(make_error_code(errc::invalid),
1223  "More than one image exists with the device_global.");
1224 
1225  // If there are no kernels using the device_global we cannot proceed.
1226  if (DeviceGlobalEntry->MImageIdentifiers.size() == 0)
1227  throw sycl::exception(make_error_code(errc::invalid),
1228  "No image exists with the device_global.");
1229 
1230  // Look for cached programs with the device_global.
1231  device Device = Queue->get_device();
1232  ContextImplPtr ContextImpl = Queue->getContextImplPtr();
1233  std::optional<sycl::detail::pi::PiProgram> CachedProgram =
1234  ContextImpl->getProgramForDeviceGlobal(Device, DeviceGlobalEntry);
1235  if (CachedProgram)
1236  return *CachedProgram;
1237 
1238  // If there was no cached program, build one.
1239  auto Context = createSyclObjFromImpl<context>(ContextImpl);
1240  ProgramManager &PM = ProgramManager::getInstance();
1241  RTDeviceBinaryImage &Img =
1242  PM.getDeviceImage(DeviceGlobalEntry->MImages, Context, Device);
1243  device_image_plain DeviceImage =
1244  PM.getDeviceImageFromBinaryImage(&Img, Context, Device);
1245  device_image_plain BuiltImage = PM.build(DeviceImage, {Device}, {});
1246  return getSyclObjImpl(BuiltImage)->get_program_ref();
1247 }
1248 
1250  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1251  size_t NumBytes, size_t Offset, const void *Src,
1252  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1253  sycl::detail::pi::PiEvent *OutEvent) {
1254  assert(
1255  Queue &&
1256  "Direct copy to device global must be called with a valid device queue");
1257  sycl::detail::pi::PiProgram Program =
1258  getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
1259  const PluginPtr &Plugin = Queue->getPlugin();
1261  Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(),
1262  false, NumBytes, Offset, Src, DepEvents.size(), DepEvents.data(),
1263  OutEvent);
1264 }
1265 
1267  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1268  size_t NumBytes, size_t Offset, void *Dest,
1269  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1270  sycl::detail::pi::PiEvent *OutEvent) {
1271  assert(Queue && "Direct copy from device global must be called with a valid "
1272  "device queue");
1273  sycl::detail::pi::PiProgram Program =
1274  getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
1275  const PluginPtr &Plugin = Queue->getPlugin();
1277  Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(),
1278  false, NumBytes, Offset, Dest, DepEvents.size(), DepEvents.data(),
1279  OutEvent);
1280 }
1281 
1282 void MemoryManager::copy_to_device_global(
1283  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1284  size_t NumBytes, size_t Offset, const void *SrcMem,
1285  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1286  sycl::detail::pi::PiEvent *OutEvent,
1287  const detail::EventImplPtr &OutEventImpl) {
1288  DeviceGlobalMapEntry *DGEntry =
1289  detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1290  DeviceGlobalPtr);
1291  assert(DGEntry &&
1292  DGEntry->MIsDeviceImageScopeDecorated == IsDeviceImageScoped &&
1293  "Invalid copy operation for device_global.");
1294  assert(DGEntry->MDeviceGlobalTSize >= Offset + NumBytes &&
1295  "Copy to device_global is out of bounds.");
1296 
1297  if (IsDeviceImageScoped)
1298  memcpyToDeviceGlobalDirect(Queue, DGEntry, NumBytes, Offset, SrcMem,
1299  DepEvents, OutEvent);
1300  else
1301  memcpyToDeviceGlobalUSM(Queue, DGEntry, NumBytes, Offset, SrcMem, DepEvents,
1302  OutEvent, OutEventImpl);
1303 }
1304 
1305 void MemoryManager::copy_from_device_global(
1306  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1307  size_t NumBytes, size_t Offset, void *DstMem,
1308  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1309  sycl::detail::pi::PiEvent *OutEvent,
1310  const detail::EventImplPtr &OutEventImpl) {
1311  DeviceGlobalMapEntry *DGEntry =
1312  detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1313  DeviceGlobalPtr);
1314  assert(DGEntry &&
1315  DGEntry->MIsDeviceImageScopeDecorated == IsDeviceImageScoped &&
1316  "Invalid copy operation for device_global.");
1317  assert(DGEntry->MDeviceGlobalTSize >= Offset + NumBytes &&
1318  "Copy from device_global is out of bounds.");
1319 
1320  if (IsDeviceImageScoped)
1321  memcpyFromDeviceGlobalDirect(Queue, DGEntry, NumBytes, Offset, DstMem,
1322  DepEvents, OutEvent);
1323  else
1324  memcpyFromDeviceGlobalUSM(Queue, DGEntry, NumBytes, Offset, DstMem,
1325  DepEvents, OutEvent, OutEventImpl);
1326 }
1327 
1328 // Command buffer methods
1329 void MemoryManager::ext_oneapi_copyD2D_cmd_buffer(
1331  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1332  void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1333  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
1334  unsigned int SrcElemSize, void *DstMem, unsigned int DimDst,
1335  sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
1336  sycl::id<3> DstOffset, unsigned int DstElemSize,
1337  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1338  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1339  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1340  (void)DstAccessRange;
1341 
1342  const PluginPtr &Plugin = Context->getPlugin();
1343 
1344  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1345  TermPositions SrcPos, DstPos;
1346  prepTermPositions(SrcPos, DimSrc, MemType);
1347  prepTermPositions(DstPos, DimDst, MemType);
1348 
1349  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1350  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1351  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
1352  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1353  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1354 
1355  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1356  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1357  "Images are not supported in Graphs");
1358  }
1359 
1360  if (1 == DimDst && 1 == DimSrc) {
1362  CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1363  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), SrcXOffBytes,
1364  DstXOffBytes, SrcAccessRangeWidthBytes, Deps.size(), Deps.data(),
1365  OutSyncPoint);
1366  } else {
1367  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
1368  // calculate both src and dest pitch using region[0], which is not correct
1369  // if src and dest are not the same size.
1370  size_t SrcRowPitch = SrcSzWidthBytes;
1371  size_t SrcSlicePitch = (DimSrc <= 1)
1372  ? SrcSzWidthBytes
1373  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
1374  size_t DstRowPitch = DstSzWidthBytes;
1375  size_t DstSlicePitch = (DimDst <= 1)
1376  ? DstSzWidthBytes
1377  : DstSzWidthBytes * DstSize[DstPos.YTerm];
1378 
1379  pi_buff_rect_offset_struct SrcOrigin{SrcXOffBytes, SrcOffset[SrcPos.YTerm],
1380  SrcOffset[SrcPos.ZTerm]};
1381  pi_buff_rect_offset_struct DstOrigin{DstXOffBytes, DstOffset[DstPos.YTerm],
1382  DstOffset[DstPos.ZTerm]};
1383  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
1384  SrcAccessRange[SrcPos.YTerm],
1385  SrcAccessRange[SrcPos.ZTerm]};
1386 
1388  CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1389  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), &SrcOrigin,
1390  &DstOrigin, &Region, SrcRowPitch, SrcSlicePitch, DstRowPitch,
1391  DstSlicePitch, Deps.size(), Deps.data(), OutSyncPoint);
1392  }
1393 }
1394 
1395 void MemoryManager::ext_oneapi_copyD2H_cmd_buffer(
1397  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1398  void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1399  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
1400  unsigned int SrcElemSize, char *DstMem, unsigned int DimDst,
1401  sycl::range<3> DstSize, sycl::id<3> DstOffset, unsigned int DstElemSize,
1402  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1403  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1404  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1405 
1406  const PluginPtr &Plugin = Context->getPlugin();
1407 
1408  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1409  TermPositions SrcPos, DstPos;
1410  prepTermPositions(SrcPos, DimSrc, MemType);
1411  prepTermPositions(DstPos, DimDst, MemType);
1412 
1413  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1414  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1415  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
1416  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1417  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1418 
1419  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1420  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1421  "Images are not supported in Graphs");
1422  }
1423 
1424  if (1 == DimDst && 1 == DimSrc) {
1425  pi_result Result =
1426  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferRead>(
1427  CommandBuffer,
1428  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1429  SrcXOffBytes, SrcAccessRangeWidthBytes, DstMem + DstXOffBytes,
1430  Deps.size(), Deps.data(), OutSyncPoint);
1431 
1432  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1433  throw sycl::exception(
1434  sycl::make_error_code(sycl::errc::feature_not_supported),
1435  "Device-to-host buffer copy command not supported by graph backend");
1436  } else {
1437  Plugin->checkPiResult(Result);
1438  }
1439  } else {
1440  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1441  size_t BufferSlicePitch =
1442  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
1443  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1444  size_t HostSlicePitch =
1445  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
1446 
1447  pi_buff_rect_offset_struct BufferOffset{
1448  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
1449  pi_buff_rect_offset_struct HostOffset{DstXOffBytes, DstOffset[DstPos.YTerm],
1450  DstOffset[DstPos.ZTerm]};
1451  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
1452  SrcAccessRange[SrcPos.YTerm],
1453  SrcAccessRange[SrcPos.ZTerm]};
1454 
1455  pi_result Result =
1456  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferReadRect>(
1457  CommandBuffer,
1458  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1459  &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1460  BufferSlicePitch, HostRowPitch, HostSlicePitch, DstMem, Deps.size(),
1461  Deps.data(), OutSyncPoint);
1462  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1463  throw sycl::exception(
1464  sycl::make_error_code(sycl::errc::feature_not_supported),
1465  "Device-to-host buffer copy command not supported by graph backend");
1466  } else {
1467  Plugin->checkPiResult(Result);
1468  }
1469  }
1470 }
1471 
1472 void MemoryManager::ext_oneapi_copyH2D_cmd_buffer(
1474  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1475  char *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1476  sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem,
1477  unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
1478  sycl::id<3> DstOffset, unsigned int DstElemSize,
1479  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1480  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1481  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1482 
1483  const PluginPtr &Plugin = Context->getPlugin();
1484 
1485  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1486  TermPositions SrcPos, DstPos;
1487  prepTermPositions(SrcPos, DimSrc, MemType);
1488  prepTermPositions(DstPos, DimDst, MemType);
1489 
1490  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1491  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1492  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
1493  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1494  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1495 
1496  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1497  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1498  "Images are not supported in Graphs");
1499  }
1500 
1501  if (1 == DimDst && 1 == DimSrc) {
1502  pi_result Result =
1503  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferWrite>(
1504  CommandBuffer,
1505  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1506  DstXOffBytes, DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes,
1507  Deps.size(), Deps.data(), OutSyncPoint);
1508 
1509  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1510  throw sycl::exception(
1511  sycl::make_error_code(sycl::errc::feature_not_supported),
1512  "Host-to-device buffer copy command not supported by graph backend");
1513  } else {
1514  Plugin->checkPiResult(Result);
1515  }
1516  } else {
1517  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1518  size_t BufferSlicePitch =
1519  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
1520  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1521  size_t HostSlicePitch =
1522  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
1523 
1524  pi_buff_rect_offset_struct BufferOffset{
1525  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
1526  pi_buff_rect_offset_struct HostOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm],
1527  SrcOffset[SrcPos.ZTerm]};
1528  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
1529  DstAccessRange[DstPos.YTerm],
1530  DstAccessRange[DstPos.ZTerm]};
1531 
1532  pi_result Result =
1534  CommandBuffer,
1535  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1536  &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1537  BufferSlicePitch, HostRowPitch, HostSlicePitch, SrcMem, Deps.size(),
1538  Deps.data(), OutSyncPoint);
1539 
1540  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1541  throw sycl::exception(
1542  sycl::make_error_code(sycl::errc::feature_not_supported),
1543  "Host-to-device buffer copy command not supported by graph backend");
1544  } else {
1545  Plugin->checkPiResult(Result);
1546  }
1547  }
1548 }
1549 
1550 void MemoryManager::ext_oneapi_copy_usm_cmd_buffer(
1551  ContextImplPtr Context, const void *SrcMem,
1552  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, size_t Len,
1553  void *DstMem, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1554  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1555  if (!SrcMem || !DstMem)
1556  throw exception(make_error_code(errc::invalid),
1557  "NULL pointer argument in memory copy operation.");
1558 
1559  const PluginPtr &Plugin = Context->getPlugin();
1560  pi_result Result =
1561  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemcpyUSM>(
1562  CommandBuffer, DstMem, SrcMem, Len, Deps.size(), Deps.data(),
1563  OutSyncPoint);
1564  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1565  throw sycl::exception(
1566  sycl::make_error_code(sycl::errc::feature_not_supported),
1567  "USM copy command not supported by graph backend");
1568  } else {
1569  Plugin->checkPiResult(Result);
1570  }
1571 }
1572 
1573 void MemoryManager::ext_oneapi_fill_usm_cmd_buffer(
1575  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem,
1576  size_t Len, const std::vector<unsigned char> &Pattern,
1577  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1578  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1579 
1580  if (!DstMem)
1581  throw exception(make_error_code(errc::invalid),
1582  "NULL pointer argument in memory fill operation.");
1583 
1584  const PluginPtr &Plugin = Context->getPlugin();
1585 
1587  CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(),
1588  Deps.data(), OutSyncPoint);
1589 }
1590 
1591 void MemoryManager::ext_oneapi_fill_cmd_buffer(
1593  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1594  void *Mem, size_t PatternSize, const unsigned char *Pattern,
1595  unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange,
1596  sycl::id<3> AccessOffset, unsigned int ElementSize,
1597  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1598  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1599  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1600 
1601  const PluginPtr &Plugin = Context->getPlugin();
1602  if (SYCLMemObj->getType() != detail::SYCLMemObjI::MemObjType::Buffer) {
1603  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1604  "Images are not supported in Graphs");
1605  }
1606 
1607  // 2D and 3D buffers accessors can't have custom range or the data will
1608  // likely be discontiguous.
1609  bool RangesUsable = (Dim <= 1) || (Size == AccessRange);
1610  // For 2D and 3D buffers, the offset must be 0, or the data will be
1611  // discontiguous.
1612  bool OffsetUsable = (Dim <= 1) || (AccessOffset == sycl::id<3>{0, 0, 0});
1613  size_t RangeMultiplier = AccessRange[0] * AccessRange[1] * AccessRange[2];
1614 
1615  if (RangesUsable && OffsetUsable) {
1617  CommandBuffer, pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
1618  PatternSize, AccessOffset[0] * ElementSize,
1619  RangeMultiplier * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
1620  return;
1621  }
1622  // The sycl::handler uses a parallel_for kernel in the case of unusable
1623  // Range or Offset, not CG:Fill. So we should not be here.
1624  throw exception(make_error_code(errc::runtime),
1625  "Not supported configuration of fill requested");
1626 }
1627 
1628 void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer(
1630  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *Mem,
1631  size_t Length, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1632  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1633  const PluginPtr &Plugin = Context->getPlugin();
1635  CommandBuffer, Mem, Length, _pi_usm_migration_flags(0), Deps.size(),
1636  Deps.data(), OutSyncPoint);
1637 }
1638 
1639 void MemoryManager::ext_oneapi_advise_usm_cmd_buffer(
1641  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const void *Mem,
1642  size_t Length, pi_mem_advice Advice,
1643  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1644  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1645  const PluginPtr &Plugin = Context->getPlugin();
1647  CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(),
1648  OutSyncPoint);
1649 }
1650 
1651 void MemoryManager::copy_image_bindless(
1652  const void *Src, QueueImplPtr Queue, void *Dst,
1654  const sycl::detail::pi::PiMemImageFormat &Format,
1660  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1661  sycl::detail::pi::PiEvent *OutEvent) {
1662  assert(Queue &&
1663  "Copy image bindless must be called with a valid device queue");
1664  assert((Flags == (sycl::detail::pi::PiImageCopyFlags)
1670  "Invalid flags passed to copy_image_bindless.");
1671  if (!Dst || !Src)
1672  throw sycl::exception(
1673  sycl::make_error_code(errc::invalid),
1674  "NULL pointer argument in bindless image copy operation.");
1675 
1676  const detail::PluginPtr &Plugin = Queue->getPlugin();
1677  Plugin->call<PiApiKind::piextMemImageCopy>(
1678  Queue->getHandleRef(), Dst, Src, &Format, &Desc, Flags, &SrcOffset,
1679  &DstOffset, &CopyExtent, &HostExtent, DepEvents.size(), DepEvents.data(),
1680  OutEvent);
1681 }
1682 
1683 } // namespace detail
1684 } // namespace _V1
1685 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
static void * allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range< 3 > Range, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateMemImage(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateImageObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const sycl::property_list &PropsList)
static void * allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::property_list &PropsList)
static void * allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const size_t Size, const sycl::property_list &PropsList)
static void * allocateMemBuffer(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
virtual void * allocateHostMem()=0
virtual void releaseMem(ContextImplPtr Context, void *Ptr)=0
virtual MemObjType getType() const =0
virtual void releaseHostMem(void *Ptr)=0
virtual void * allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, sycl::detail::pi::PiEvent &InteropEvent)=0
static void bufferAssociateNotification(const void *, const void *)
static void bufferReleaseNotification(const void *, const void *)
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
A unique identifier of an item in an index space.
Definition: id.hpp:36
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
size_t size() const
Definition: range.hpp:56
::pi_mem_flags PiMemFlags
Definition: pi.hpp:121
::pi_ext_sync_point PiExtSyncPoint
Definition: pi.hpp:135
static void memcpyToDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone)
static void memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
void prepTermPositions(TermPositions &pos, int Dimensions, detail::SYCLMemObjI::MemObjType Type)
void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent >, sycl::detail::pi::PiEvent &, const detail::EventImplPtr &)
void memUnmapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static void waitForEvents(const std::vector< EventImplPtr > &Events)
void memBufferMapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
exception set_pi_error(exception &&e, pi_int32 pi_err)
Definition: exception.cpp:70
void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem)
std::shared_ptr< event_impl > EventImplPtr
Definition: handler.hpp:184
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:47
uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr)
static void memcpyToDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static sycl::detail::pi::PiProgram getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry)
void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, size_t AllocSize, size_t GuardZone, uint64_t CorrelationID)
void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, uint64_t CorrelationID)
void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 >, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void memcpyFromDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:34
void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:131
static sycl::detail::pi::PiMemFlags getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly)
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
void unmap(const void *Ptr, size_t NumBytes, const context &SyclContext)
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:536
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:85
class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_cuda.cpp:236
constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY
Definition: pi.h:832
uintptr_t pi_native_handle
Definition: pi.h:253
pi_bitfield pi_map_flags
Definition: pi.h:839
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:779
_pi_result
Definition: pi.h:260
pi_result piextUSMEnqueueFill(pi_queue queue, void *ptr, const void *pattern, size_t patternSize, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Fill API.
Definition: pi_cuda.cpp:947
pi_uint32 pi_bool
Definition: pi.h:251
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_cuda.cpp:976
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC
Definition: pi.h:836
_pi_mem_advice
Definition: pi.h:661
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:831
pi_bitfield pi_mem_properties
Definition: pi.h:845
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:716
pi_result piextCommandBufferMemBufferCopy(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer copy command to the command-buffer.
Definition: pi_cuda.cpp:1150
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_cuda.cpp:227
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_cuda.cpp:966
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Device global variable.
Definition: pi_cuda.cpp:1035
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:699
_pi_usm_migration_flags
Definition: pi.h:2143
_pi_image_copy_flags
Definition: pi.h:738
const pi_bool PI_FALSE
Definition: pi.h:747
pi_result piextCommandBufferMemBufferReadRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer read command to the command-buffer.
Definition: pi_cuda.cpp:1181
pi_result piextCommandBufferMemBufferRead(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer read command to the command-buffer.
Definition: pi_cuda.cpp:1172
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
Definition: pi_cuda.cpp:996
pi_result piextCommandBufferMemBufferFill(pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer fill command to the command-buffer.
Definition: pi_cuda.cpp:1216
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:742
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:768
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:754
@ PI_BUFFER_CREATE_TYPE_REGION
Definition: pi.h:744
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:815
uint32_t pi_uint32
Definition: pi.h:249
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_cuda.cpp:632
pi_bitfield pi_mem_flags
Definition: pi.h:829
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_cuda.cpp:804
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D fill API.
Definition: pi_cuda.cpp:983
pi_result piextCommandBufferMemBufferCopyRect(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer copy command to the command-buffer.
Definition: pi_cuda.cpp:1160
pi_result piMemRelease(pi_mem mem)
Definition: pi_cuda.cpp:225
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:834
constexpr pi_map_flags PI_MAP_WRITE
Definition: pi.h:841
constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION
Definition: pi.h:842
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:857
pi_result piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM memcpy command to the command-buffer.
Definition: pi_cuda.cpp:1141
constexpr pi_map_flags PI_MAP_READ
Definition: pi.h:840
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:792
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_cuda.cpp:878
pi_result piextCommandBufferMemBufferWrite(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer write command to the command-buffer.
Definition: pi_cuda.cpp:1194
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:831
pi_result piextCommandBufferFillUSM(pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM fill command to the command-buffer.
Definition: pi_cuda.cpp:1226
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:843
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:555
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
Definition: pi_cuda.cpp:210
pi_result piextCommandBufferAdviseUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t length, pi_mem_advice advice, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Advise command to the command-buffer.
Definition: pi_cuda.cpp:1246
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
Definition: pi_cuda.cpp:1008
constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
Definition: pi.h:847
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:866
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_cuda.cpp:955
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:728
pi_result piextMemImageCopy(pi_queue command_queue, void *dst_ptr, const void *src_ptr, const pi_image_format *image_format, const pi_image_desc *image_desc, const pi_image_copy_flags flags, pi_image_offset src_offset, pi_image_offset dst_offset, pi_image_region copy_extent, pi_image_region host_extent, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to copy image data Host to Device or Device to Host.
Definition: pi_cuda.cpp:426
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:124
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
Definition: pi_cuda.cpp:1044
pi_result piextCommandBufferMemBufferWriteRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer write command to the command-buffer.
Definition: pi_cuda.cpp:1203
pi_result piextCommandBufferPrefetchUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Prefetch command to the command-buffer.
Definition: pi_cuda.cpp:1237
pi_result piEventRetain(pi_event event)
Definition: pi_cuda.cpp:649
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL
Definition: pi.h:846
DeviceGlobalUSMMem & getOrAllocateDeviceGlobalUSM(const std::shared_ptr< queue_impl > &QueueImpl)
std::unordered_set< RTDeviceBinaryImage * > MImages
OwnedPiEvent getInitEvent(const PluginPtr &Plugin)
sycl::detail::pi::PiEvent GetEvent()
Definition: pi_utils.hpp:55