DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_manager.cpp
Go to the documentation of this file.
1 //==-------------- memory_manager.cpp --------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include <detail/event_impl.hpp>
14 #include <detail/pi_utils.hpp>
15 #include <detail/queue_impl.hpp>
16 #include <detail/xpti_registry.hpp>
17 
18 #include <sycl/usm/usm_enums.hpp>
20 
21 #include <algorithm>
22 #include <cassert>
23 #include <cstring>
24 #include <vector>
25 
26 #ifdef XPTI_ENABLE_INSTRUMENTATION
27 #include <xpti/xpti_data_types.h>
28 #include <xpti/xpti_trace_framework.hpp>
29 #endif
30 
31 namespace sycl {
32 inline namespace _V1 {
33 namespace detail {
34 
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 uint8_t GMemAllocStreamID;
37 xpti::trace_event_data_t *GMemAllocEvent;
38 #endif
39 
40 uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize,
41  size_t GuardZone) {
42  (void)ObjHandle;
43  (void)AllocSize;
44  (void)GuardZone;
45  uint64_t CorrelationID = 0;
46 #ifdef XPTI_ENABLE_INSTRUMENTATION
47  constexpr uint16_t NotificationTraceType =
48  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_begin);
49  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
50  xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 /* alloc ptr */, AllocSize,
51  GuardZone};
52 
53  CorrelationID = xptiGetUniqueId();
54  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
55  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
56  }
57 #endif
58  return CorrelationID;
59 }
60 
61 void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
62  size_t AllocSize, size_t GuardZone,
63  uint64_t CorrelationID) {
64  (void)ObjHandle;
65  (void)AllocPtr;
66  (void)AllocSize;
67  (void)GuardZone;
68  (void)CorrelationID;
69 #ifdef XPTI_ENABLE_INSTRUMENTATION
70  constexpr uint16_t NotificationTraceType =
71  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_end);
72  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
73  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
74 
75  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
76  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
77  }
78 #endif
79 }
80 
81 uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr) {
82  (void)ObjHandle;
83  (void)AllocPtr;
84  uint64_t CorrelationID = 0;
85 #ifdef XPTI_ENABLE_INSTRUMENTATION
86  constexpr uint16_t NotificationTraceType =
87  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_begin);
88  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
89  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
90  0 /* guard zone */};
91 
92  CorrelationID = xptiGetUniqueId();
93  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
94  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
95  }
96 #endif
97  return CorrelationID;
98 }
99 
100 void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
101  uint64_t CorrelationID) {
102  (void)ObjHandle;
103  (void)AllocPtr;
104  (void)CorrelationID;
105 #ifdef XPTI_ENABLE_INSTRUMENTATION
106  constexpr uint16_t NotificationTraceType =
107  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_end);
108  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
109  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
110  0 /* guard zone */};
111 
112  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
113  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
114  }
115 #endif
116 }
117 
118 static void waitForEvents(const std::vector<EventImplPtr> &Events) {
119  // Assuming all events will be on the same device or
120  // devices associated with the same Backend.
121  if (!Events.empty()) {
122  const PluginPtr &Plugin = Events[0]->getPlugin();
123  std::vector<sycl::detail::pi::PiEvent> PiEvents(Events.size());
124  std::transform(Events.begin(), Events.end(), PiEvents.begin(),
125  [](const EventImplPtr &EventImpl) {
126  return EventImpl->getHandleRef();
127  });
128  Plugin->call<PiApiKind::piEventsWait>(PiEvents.size(), &PiEvents[0]);
129  }
130 }
131 
133  pi_mem_flags Flags, size_t Size, void *HostPtr,
134  pi_mem *RetMem, const pi_mem_properties *Props) {
135 #ifdef XPTI_ENABLE_INSTRUMENTATION
136  uint64_t CorrID = 0;
137 #endif
138  // We only want to instrument piMemBufferCreate
139  {
140 #ifdef XPTI_ENABLE_INSTRUMENTATION
141  CorrID =
142  emitMemAllocBeginTrace(0 /* mem object */, Size, 0 /* guard zone */);
143  xpti::utils::finally _{[&] {
144  // C-style cast is required for MSVC
145  uintptr_t MemObjID = (uintptr_t)(*RetMem);
146  pi_native_handle Ptr = 0;
147  // Always use call_nocheck here, because call may throw an exception,
148  // and this lambda will be called from destructor, which in combination
149  // rewards us with UB.
150  // When doing buffer interop we don't know what device the memory should
151  // be resident on, so pass nullptr for Device param. Buffer interop may
152  // not be supported by all backends.
153  Plugin->call_nocheck<PiApiKind::piextMemGetNativeHandle>(
154  *RetMem, /*Dev*/ nullptr, &Ptr);
155  emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */,
156  CorrID);
157  }};
158 #endif
159  if (Size)
160  Plugin->call<PiApiKind::piMemBufferCreate>(Ctx, Flags, Size, HostPtr,
161  RetMem, Props);
162  }
163 }
164 
165 void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem) {
166  // FIXME piMemRelease does not guarante memory release. It is only true if
167  // reference counter is 1. However, SYCL runtime currently only calls
168  // piMemRetain only for OpenCL interop
169 #ifdef XPTI_ENABLE_INSTRUMENTATION
170  uint64_t CorrID = 0;
171  // C-style cast is required for MSVC
172  uintptr_t MemObjID = (uintptr_t)(Mem);
173  uintptr_t Ptr = 0;
174  // Do not make unnecessary PI calls without instrumentation enabled
175  if (xptiTraceEnabled()) {
176  pi_native_handle PtrHandle = 0;
177  // When doing buffer interop we don't know what device the memory should be
178  // resident on, so pass nullptr for Device param. Buffer interop may not be
179  // supported by all backends.
180  Plugin->call<PiApiKind::piextMemGetNativeHandle>(Mem, /*Dev*/ nullptr,
181  &PtrHandle);
182  Ptr = (uintptr_t)(PtrHandle);
183  }
184 #endif
185  // We only want to instrument piMemRelease
186  {
187 #ifdef XPTI_ENABLE_INSTRUMENTATION
188  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
189  xpti::utils::finally _{
190  [&] { emitMemReleaseEndTrace(MemObjID, Ptr, CorrID); }};
191 #endif
192  Plugin->call<PiApiKind::piMemRelease>(Mem);
193  }
194 }
195 
196 void memBufferMapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Buffer,
197  pi_bool Blocking, pi_map_flags Flags, size_t Offset,
198  size_t Size, pi_uint32 NumEvents,
199  const pi_event *WaitList, pi_event *Event,
200  void **RetMap) {
201 #ifdef XPTI_ENABLE_INSTRUMENTATION
202  uint64_t CorrID = 0;
203  uintptr_t MemObjID = (uintptr_t)(Buffer);
204 #endif
205  // We only want to instrument piEnqueueMemBufferMap
206 
207 #ifdef XPTI_ENABLE_INSTRUMENTATION
208  CorrID = emitMemAllocBeginTrace(MemObjID, Size, 0 /* guard zone */);
209  xpti::utils::finally _{[&] {
210  emitMemAllocEndTrace(MemObjID, (uintptr_t)(*RetMap), Size,
211  0 /* guard zone */, CorrID);
212  }};
213 #endif
214  Plugin->call<PiApiKind::piEnqueueMemBufferMap>(Queue, Buffer, Blocking, Flags,
215  Offset, Size, NumEvents,
216  WaitList, Event, RetMap);
217 }
218 
219 void memUnmapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Mem,
220  void *MappedPtr, pi_uint32 NumEvents,
221  const pi_event *WaitList, pi_event *Event) {
222 #ifdef XPTI_ENABLE_INSTRUMENTATION
223  uint64_t CorrID = 0;
224  uintptr_t MemObjID = (uintptr_t)(Mem);
225  uintptr_t Ptr = (uintptr_t)(MappedPtr);
226 #endif
227  // We only want to instrument piEnqueueMemUnmap
228  {
229 #ifdef XPTI_ENABLE_INSTRUMENTATION
230  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
231  xpti::utils::finally _{[&] {
232  // There's no way for SYCL to know, when the pointer is freed, so we have
233  // to explicitly wait for the end of data transfers here in order to
234  // report correct events.
235  // Always use call_nocheck here, because call may throw an exception,
236  // and this lambda will be called from destructor, which in combination
237  // rewards us with UB.
238  Plugin->call_nocheck<PiApiKind::piEventsWait>(1, Event);
239  emitMemReleaseEndTrace(MemObjID, Ptr, CorrID);
240  }};
241 #endif
242  Plugin->call<PiApiKind::piEnqueueMemUnmap>(Queue, Mem, MappedPtr, NumEvents,
243  WaitList, Event);
244  }
245 }
246 
248  void *MemAllocation,
249  std::vector<EventImplPtr> DepEvents,
250  sycl::detail::pi::PiEvent &OutEvent) {
251  // There is no async API for memory releasing. Explicitly wait for all
252  // dependency events and return empty event.
253  waitForEvents(DepEvents);
254  OutEvent = nullptr;
255  XPTIRegistry::bufferReleaseNotification(MemObj, MemAllocation);
256  MemObj->releaseMem(TargetContext, MemAllocation);
257 }
258 
260  SYCLMemObjI *MemObj, void *MemAllocation,
261  void *UserPtr) {
262  if (UserPtr == MemAllocation) {
263  // Do nothing as it's user provided memory.
264  return;
265  }
266 
267  if (TargetContext->is_host()) {
268  MemObj->releaseHostMem(MemAllocation);
269  return;
270  }
271 
272  const PluginPtr &Plugin = TargetContext->getPlugin();
273  memReleaseHelper(Plugin, pi::cast<sycl::detail::pi::PiMem>(MemAllocation));
274 }
275 
277  bool InitFromUserData, void *HostPtr,
278  std::vector<EventImplPtr> DepEvents,
279  sycl::detail::pi::PiEvent &OutEvent) {
280  // There is no async API for memory allocation. Explicitly wait for all
281  // dependency events and return empty event.
282  waitForEvents(DepEvents);
283  OutEvent = nullptr;
284 
285  return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr,
286  OutEvent);
287 }
288 
289 void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
290  bool HostPtrReadOnly, size_t Size,
291  const sycl::property_list &) {
292  std::ignore = HostPtrReadOnly;
293  std::ignore = Size;
294 
295  // Can return user pointer directly if it is not a nullptr.
296  if (UserPtr)
297  return UserPtr;
298 
299  return MemObj->allocateHostMem();
300  ;
301 }
302 
304  ContextImplPtr TargetContext, void *UserPtr,
305  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
306  const sycl::property_list &, sycl::detail::pi::PiEvent &OutEventToWait) {
307  (void)TargetContext;
308  (void)InteropContext;
309  // If memory object is created with interop c'tor return cl_mem as is.
310  assert(TargetContext == InteropContext && "Expected matching contexts");
311  OutEventToWait = InteropEvent->getHandleRef();
312  // Retain the event since it will be released during alloca command
313  // destruction
314  if (nullptr != OutEventToWait) {
315  const PluginPtr &Plugin = InteropEvent->getPlugin();
316  Plugin->call<PiApiKind::piEventRetain>(OutEventToWait);
317  }
318  return UserPtr;
319 }
320 
322 getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly) {
323  // Create read_write mem object to handle arbitrary uses.
326  if (UserPtr)
327  Result |= PI_MEM_FLAGS_HOST_PTR_USE;
328  return Result;
329 }
330 
332  ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly,
335  const sycl::property_list &) {
336  sycl::detail::pi::PiMemFlags CreationFlags =
337  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
338 
340  const PluginPtr &Plugin = TargetContext->getPlugin();
341  Plugin->call<PiApiKind::piMemImageCreate>(TargetContext->getHandleRef(),
342  CreationFlags, &Format, &Desc,
343  UserPtr, &NewMem);
344  return NewMem;
345 }
346 
347 void *
349  bool HostPtrReadOnly, const size_t Size,
350  const sycl::property_list &PropsList) {
351  sycl::detail::pi::PiMemFlags CreationFlags =
352  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
353  if (PropsList.has_property<
355  CreationFlags |= PI_MEM_FLAGS_HOST_PTR_ALLOC;
356 
357  sycl::detail::pi::PiMem NewMem = nullptr;
358  const PluginPtr &Plugin = TargetContext->getPlugin();
359 
360  std::vector<pi_mem_properties> AllocProps;
361 
363  TargetContext->isBufferLocationSupported()) {
364  auto Location =
366  .get_buffer_location();
367  AllocProps.reserve(AllocProps.size() + 2);
368  AllocProps.push_back(PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION);
369  AllocProps.push_back(Location);
370  }
371 
372  if (PropsList.has_property<property::buffer::mem_channel>()) {
373  auto Channel =
374  PropsList.get_property<property::buffer::mem_channel>().get_channel();
375  AllocProps.reserve(AllocProps.size() + 2);
376  AllocProps.push_back(PI_MEM_PROPERTIES_CHANNEL);
377  AllocProps.push_back(Channel);
378  }
379 
380  pi_mem_properties *AllocPropsPtr = nullptr;
381  if (!AllocProps.empty()) {
382  // If there are allocation properties, push an end to the list and update
383  // the properties pointer.
384  AllocProps.push_back(0);
385  AllocPropsPtr = AllocProps.data();
386  }
387 
388  memBufferCreateHelper(Plugin, TargetContext->getHandleRef(), CreationFlags,
389  Size, UserPtr, &NewMem, AllocPropsPtr);
390  return NewMem;
391 }
392 
394  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
395  bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent,
396  const ContextImplPtr &InteropContext, const sycl::property_list &PropsList,
397  sycl::detail::pi::PiEvent &OutEventToWait) {
398  void *MemPtr;
399  if (TargetContext->is_host())
400  MemPtr =
401  allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
402  else if (UserPtr && InteropContext)
403  MemPtr =
404  allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
405  InteropContext, PropsList, OutEventToWait);
406  else
407  MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
408  PropsList);
410  return MemPtr;
411 }
412 
414  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
415  bool HostPtrReadOnly, size_t Size,
418  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
419  const sycl::property_list &PropsList,
420  sycl::detail::pi::PiEvent &OutEventToWait) {
421  if (TargetContext->is_host())
422  return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
423  PropsList);
424  if (UserPtr && InteropContext)
425  return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
426  InteropContext, PropsList, OutEventToWait);
427  return allocateImageObject(TargetContext, UserPtr, HostPtrReadOnly, Desc,
428  Format, PropsList);
429 }
430 
432  void *ParentMemObj, size_t ElemSize,
433  size_t Offset, range<3> Range,
434  std::vector<EventImplPtr> DepEvents,
435  sycl::detail::pi::PiEvent &OutEvent) {
436  waitForEvents(DepEvents);
437  OutEvent = nullptr;
438 
439  if (TargetContext->is_host())
440  return static_cast<void *>(static_cast<char *>(ParentMemObj) + Offset);
441 
442  size_t SizeInBytes = ElemSize;
443  for (size_t I = 0; I < 3; ++I)
444  SizeInBytes *= Range[I];
445 
446  sycl::detail::pi::PiResult Error = PI_SUCCESS;
447  pi_buffer_region_struct Region{Offset, SizeInBytes};
449  const PluginPtr &Plugin = TargetContext->getPlugin();
450  Error = Plugin->call_nocheck<PiApiKind::piMemBufferPartition>(
451  pi::cast<sycl::detail::pi::PiMem>(ParentMemObj), PI_MEM_FLAGS_ACCESS_RW,
452  PI_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem);
453  if (Error == PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET)
454  throw invalid_object_error(
455  "Specified offset of the sub-buffer being constructed is not a "
456  "multiple of the memory base address alignment",
457  PI_ERROR_INVALID_VALUE);
458 
459  if (Error != PI_SUCCESS) {
460  Plugin->reportPiError(Error, "allocateMemSubBuffer()");
461  }
462 
463  return NewMem;
464 }
465 
467  int XTerm;
468  int YTerm;
469  int ZTerm;
470 };
473  // For buffers, the offsets/ranges coming from accessor are always
474  // id<3>/range<3> But their organization varies by dimension:
475  // 1 ==> {width, 1, 1}
476  // 2 ==> {height, width, 1}
477  // 3 ==> {depth, height, width}
478  // Some callers schedule 0 as DimDst/DimSrc.
479 
480  if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
481  if (Dimensions == 3) {
482  pos.XTerm = 2, pos.YTerm = 1, pos.ZTerm = 0;
483  } else if (Dimensions == 2) {
484  pos.XTerm = 1, pos.YTerm = 0, pos.ZTerm = 2;
485  } else { // Dimension is 1 or 0
486  pos.XTerm = 0, pos.YTerm = 1, pos.ZTerm = 2;
487  }
488  } else { // While range<>/id<> use by images is different than buffers, it's
489  // consistent with their accessors.
490  pos.XTerm = 0;
491  pos.YTerm = 1;
492  pos.ZTerm = 2;
493  }
494 }
495 
496 void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
497  unsigned int DimSrc, sycl::range<3> SrcSize,
498  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
499  unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem,
500  QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range<3> DstSize,
501  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
502  unsigned int DstElemSize,
503  std::vector<sycl::detail::pi::PiEvent> DepEvents,
504  sycl::detail::pi::PiEvent &OutEvent,
505  const detail::EventImplPtr &OutEventImpl) {
506  (void)SrcAccessRange;
507  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
508 
509  const sycl::detail::pi::PiQueue Queue = TgtQueue->getHandleRef();
510  const PluginPtr &Plugin = TgtQueue->getPlugin();
511 
512  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
513  TermPositions SrcPos, DstPos;
514  prepTermPositions(SrcPos, DimSrc, MemType);
515  prepTermPositions(DstPos, DimDst, MemType);
516 
517  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
518  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
519  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
520  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
521  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
522 
523  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
524  if (1 == DimDst && 1 == DimSrc) {
525  if (OutEventImpl != nullptr)
526  OutEventImpl->setHostEnqueueTime();
528  Queue, DstMem,
529  /*blocking_write=*/PI_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
530  SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
531  } else {
532  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
533  size_t BufferSlicePitch =
534  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
535  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
536  size_t HostSlicePitch =
537  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
538 
539  pi_buff_rect_offset_struct BufferOffset{
540  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
541  pi_buff_rect_offset_struct HostOffset{
542  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
543  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
544  DstAccessRange[DstPos.YTerm],
545  DstAccessRange[DstPos.ZTerm]};
546  if (OutEventImpl != nullptr)
547  OutEventImpl->setHostEnqueueTime();
549  Queue, DstMem,
550  /*blocking_write=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
551  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
552  SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
553  }
554  } else {
555  size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
556  size_t InputSlicePitch =
557  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
558 
559  pi_image_offset_struct Origin{DstOffset[DstPos.XTerm],
560  DstOffset[DstPos.YTerm],
561  DstOffset[DstPos.ZTerm]};
562  pi_image_region_struct Region{DstAccessRange[DstPos.XTerm],
563  DstAccessRange[DstPos.YTerm],
564  DstAccessRange[DstPos.ZTerm]};
565  if (OutEventImpl != nullptr)
566  OutEventImpl->setHostEnqueueTime();
567  Plugin->call<PiApiKind::piEnqueueMemImageWrite>(
568  Queue, DstMem,
569  /*blocking_write=*/PI_FALSE, &Origin, &Region, InputRowPitch,
570  InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
571  }
572 }
573 
574 void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
575  QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize,
576  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
577  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
578  unsigned int DimDst, sycl::range<3> DstSize,
579  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
580  unsigned int DstElemSize,
581  std::vector<sycl::detail::pi::PiEvent> DepEvents,
582  sycl::detail::pi::PiEvent &OutEvent,
583  const detail::EventImplPtr &OutEventImpl) {
584  (void)DstAccessRange;
585  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
586 
587  const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
588  const PluginPtr &Plugin = SrcQueue->getPlugin();
589 
590  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
591  TermPositions SrcPos, DstPos;
592  prepTermPositions(SrcPos, DimSrc, MemType);
593  prepTermPositions(DstPos, DimDst, MemType);
594 
595  // For a given buffer, the various mem copy routines (copyD2H, copyH2D,
596  // copyD2D) will usually have the same values for AccessRange, Size,
597  // Dimension, Offset, etc. EXCEPT when the dtor for ~SYCLMemObjT is called.
598  // Essentially, it schedules a copyBack of chars thus in copyD2H the
599  // Dimension will then be 1 and DstAccessRange[0] and DstSize[0] will be
600  // sized to bytes with a DstElemSize of 1.
601  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
602  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
603  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
604  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
605  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
606 
607  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
608  if (1 == DimDst && 1 == DimSrc) {
609  if (OutEventImpl != nullptr)
610  OutEventImpl->setHostEnqueueTime();
611  Plugin->call<PiApiKind::piEnqueueMemBufferRead>(
612  Queue, SrcMem,
613  /*blocking_read=*/PI_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
614  DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
615  } else {
616  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
617  size_t BufferSlicePitch =
618  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
619  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
620  size_t HostSlicePitch =
621  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
622 
623  pi_buff_rect_offset_struct BufferOffset{
624  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
625  pi_buff_rect_offset_struct HostOffset{
626  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
627  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
628  SrcAccessRange[SrcPos.YTerm],
629  SrcAccessRange[SrcPos.ZTerm]};
630  if (OutEventImpl != nullptr)
631  OutEventImpl->setHostEnqueueTime();
633  Queue, SrcMem,
634  /*blocking_read=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
635  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
636  DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
637  }
638  } else {
639  size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
640  size_t SlicePitch =
641  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
642 
643  pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm],
644  SrcOffset[SrcPos.YTerm],
645  SrcOffset[SrcPos.ZTerm]};
646  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
647  SrcAccessRange[SrcPos.YTerm],
648  SrcAccessRange[SrcPos.ZTerm]};
649  if (OutEventImpl != nullptr)
650  OutEventImpl->setHostEnqueueTime();
651  Plugin->call<PiApiKind::piEnqueueMemImageRead>(
652  Queue, SrcMem, PI_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
653  DepEvents.size(), DepEvents.data(), &OutEvent);
654  }
655 }
656 
657 void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
658  QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize,
659  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
660  unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem,
661  QueueImplPtr, unsigned int DimDst, sycl::range<3> DstSize,
662  sycl::range<3>, sycl::id<3> DstOffset, unsigned int DstElemSize,
663  std::vector<sycl::detail::pi::PiEvent> DepEvents,
664  sycl::detail::pi::PiEvent &OutEvent,
665  const detail::EventImplPtr &OutEventImpl) {
666  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
667 
668  const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
669  const PluginPtr &Plugin = SrcQueue->getPlugin();
670 
671  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
672  TermPositions SrcPos, DstPos;
673  prepTermPositions(SrcPos, DimSrc, MemType);
674  prepTermPositions(DstPos, DimDst, MemType);
675 
676  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
677  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
678  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
679  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
680  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
681 
682  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
683  if (1 == DimDst && 1 == DimSrc) {
684  if (OutEventImpl != nullptr)
685  OutEventImpl->setHostEnqueueTime();
686  Plugin->call<PiApiKind::piEnqueueMemBufferCopy>(
687  Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
688  SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
689  &OutEvent);
690  } else {
691  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
692  // calculate both src and dest pitch using region[0], which is not correct
693  // if src and dest are not the same size.
694  size_t SrcRowPitch = SrcSzWidthBytes;
695  size_t SrcSlicePitch = (DimSrc <= 1)
696  ? SrcSzWidthBytes
697  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
698  size_t DstRowPitch = DstSzWidthBytes;
699  size_t DstSlicePitch = (DimDst <= 1)
700  ? DstSzWidthBytes
701  : DstSzWidthBytes * DstSize[DstPos.YTerm];
702 
703  pi_buff_rect_offset_struct SrcOrigin{
704  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
705  pi_buff_rect_offset_struct DstOrigin{
706  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
707  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
708  SrcAccessRange[SrcPos.YTerm],
709  SrcAccessRange[SrcPos.ZTerm]};
710  if (OutEventImpl != nullptr)
711  OutEventImpl->setHostEnqueueTime();
713  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
714  SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
715  DepEvents.data(), &OutEvent);
716  }
717  } else {
718  pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm],
719  SrcOffset[SrcPos.YTerm],
720  SrcOffset[SrcPos.ZTerm]};
721  pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm],
722  DstOffset[DstPos.YTerm],
723  DstOffset[DstPos.ZTerm]};
724  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
725  SrcAccessRange[SrcPos.YTerm],
726  SrcAccessRange[SrcPos.ZTerm]};
727  if (OutEventImpl != nullptr)
728  OutEventImpl->setHostEnqueueTime();
729  Plugin->call<PiApiKind::piEnqueueMemImageCopy>(
730  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
731  DepEvents.size(), DepEvents.data(), &OutEvent);
732  }
733 }
734 
735 static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr,
736  unsigned int DimSrc, sycl::range<3> SrcSize,
737  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
738  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
739  unsigned int DimDst, sycl::range<3> DstSize,
740  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
741  unsigned int DstElemSize,
742  std::vector<sycl::detail::pi::PiEvent>,
744  if ((DimSrc != 1 || DimDst != 1) &&
745  (SrcOffset != id<3>{0, 0, 0} || DstOffset != id<3>{0, 0, 0} ||
746  SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
747  throw runtime_error("Not supported configuration of memcpy requested",
748  PI_ERROR_INVALID_OPERATION);
749  }
750 
751  SrcMem += SrcOffset[0] * SrcElemSize;
752  DstMem += DstOffset[0] * DstElemSize;
753 
754  if (SrcMem == DstMem)
755  return;
756 
757  size_t BytesToCopy =
758  SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
759  std::memcpy(DstMem, SrcMem, BytesToCopy);
760 }
761 
762 // Copies memory between: host and device, host and host,
763 // device and device if memory objects bound to the one context.
764 void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
765  QueueImplPtr SrcQueue, unsigned int DimSrc,
766  sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange,
767  sycl::id<3> SrcOffset, unsigned int SrcElemSize,
768  void *DstMem, QueueImplPtr TgtQueue,
769  unsigned int DimDst, sycl::range<3> DstSize,
770  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
771  unsigned int DstElemSize,
772  std::vector<sycl::detail::pi::PiEvent> DepEvents,
773  sycl::detail::pi::PiEvent &OutEvent,
774  const detail::EventImplPtr &OutEventImpl) {
775 
776  if (SrcQueue->is_host()) {
777  if (TgtQueue->is_host())
778  copyH2H(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
779  SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem,
780  std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
781  DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
782  else
783  copyH2D(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
784  SrcAccessRange, SrcOffset, SrcElemSize,
785  pi::cast<sycl::detail::pi::PiMem>(DstMem), std::move(TgtQueue),
786  DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
787  std::move(DepEvents), OutEvent, OutEventImpl);
788  } else {
789  if (TgtQueue->is_host())
790  copyD2H(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
791  std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
792  SrcElemSize, (char *)DstMem, std::move(TgtQueue), DimDst, DstSize,
793  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
794  OutEvent, OutEventImpl);
795  else
796  copyD2D(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
797  std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
798  SrcElemSize, pi::cast<sycl::detail::pi::PiMem>(DstMem),
799  std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
800  DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
801  }
802 }
803 
804 // TODO: This function will remain until ABI-breaking change
805 void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
806  QueueImplPtr SrcQueue, unsigned int DimSrc,
807  sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange,
808  sycl::id<3> SrcOffset, unsigned int SrcElemSize,
809  void *DstMem, QueueImplPtr TgtQueue,
810  unsigned int DimDst, sycl::range<3> DstSize,
811  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
812  unsigned int DstElemSize,
813  std::vector<sycl::detail::pi::PiEvent> DepEvents,
814  sycl::detail::pi::PiEvent &OutEvent) {
815  MemoryManager::copy(SYCLMemObj, SrcMem, SrcQueue, DimSrc, SrcSize,
816  SrcAccessRange, SrcOffset, SrcElemSize, DstMem, TgtQueue,
817  DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
818  DepEvents, OutEvent, nullptr);
819 }
820 
821 void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
822  size_t PatternSize, const char *Pattern,
823  unsigned int Dim, sycl::range<3> MemRange,
824  sycl::range<3> AccRange, sycl::id<3> Offset,
825  unsigned int ElementSize,
826  std::vector<sycl::detail::pi::PiEvent> DepEvents,
827  sycl::detail::pi::PiEvent &OutEvent,
828  const detail::EventImplPtr &OutEventImpl) {
829  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
830 
831  const PluginPtr &Plugin = Queue->getPlugin();
832 
833  if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
834  if (OutEventImpl != nullptr)
835  OutEventImpl->setHostEnqueueTime();
836 
837  // 2D and 3D buffers accessors can't have custom range or the data will
838  // likely be discontiguous.
839  bool RangesUsable = (Dim <= 1) || (MemRange == AccRange);
840  // For 2D and 3D buffers, the offset must be 0, or the data will be
841  // discontiguous.
842  bool OffsetUsable = (Dim <= 1) || (Offset == sycl::id<3>{0, 0, 0});
843  size_t RangeMultiplier = AccRange[0] * AccRange[1] * AccRange[2];
844 
845  if (RangesUsable && OffsetUsable) {
846  Plugin->call<PiApiKind::piEnqueueMemBufferFill>(
847  Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem),
848  Pattern, PatternSize, Offset[0] * ElementSize,
849  RangeMultiplier * ElementSize, DepEvents.size(), DepEvents.data(),
850  &OutEvent);
851  return;
852  }
853  // The sycl::handler uses a parallel_for kernel in the case of unusable
854  // Range or Offset, not CG:Fill. So we should not be here.
855  throw runtime_error("Not supported configuration of fill requested",
856  PI_ERROR_INVALID_OPERATION);
857  } else {
858  if (OutEventImpl != nullptr)
859  OutEventImpl->setHostEnqueueTime();
860  // images don't support offset accessors and thus avoid issues of
861  // discontinguous data
862  Plugin->call<PiApiKind::piEnqueueMemImageFill>(
863  Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
864  &Offset[0], &AccRange[0], DepEvents.size(), DepEvents.data(),
865  &OutEvent);
866  }
867 }
868 
869 // TODO: This function will remain until ABI-breaking change
870 void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
871  size_t PatternSize, const char *Pattern,
872  unsigned int Dim, sycl::range<3> Size,
873  sycl::range<3> Range, sycl::id<3> Offset,
874  unsigned int ElementSize,
875  std::vector<sycl::detail::pi::PiEvent> DepEvents,
876  sycl::detail::pi::PiEvent &OutEvent) {
877  MemoryManager::fill(SYCLMemObj, Mem, Queue, PatternSize, Pattern, Dim, Size,
878  Range, Offset, ElementSize, DepEvents, OutEvent, nullptr);
879 }
880 
881 void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
882  access::mode AccessMode, unsigned int, sycl::range<3>,
883  sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
884  unsigned int ElementSize,
885  std::vector<sycl::detail::pi::PiEvent> DepEvents,
886  sycl::detail::pi::PiEvent &OutEvent) {
887  if (Queue->is_host()) {
888  throw runtime_error("Not supported configuration of map requested",
889  PI_ERROR_INVALID_OPERATION);
890  }
891 
892  pi_map_flags Flags = 0;
893 
894  switch (AccessMode) {
895  case access::mode::read:
896  Flags |= PI_MAP_READ;
897  break;
898  case access::mode::write:
899  Flags |= PI_MAP_WRITE;
900  break;
902  case access::mode::atomic:
903  Flags = PI_MAP_WRITE | PI_MAP_READ;
904  break;
905  case access::mode::discard_write:
906  case access::mode::discard_read_write:
908  break;
909  }
910 
911  AccessOffset[0] *= ElementSize;
912  AccessRange[0] *= ElementSize;
913 
914  // TODO: Handle offset
915  assert(AccessOffset[0] == 0 && "Handle offset");
916 
917  void *MappedPtr = nullptr;
918  const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
919  const PluginPtr &Plugin = Queue->getPlugin();
920  memBufferMapHelper(Plugin, Queue->getHandleRef(),
921  pi::cast<sycl::detail::pi::PiMem>(Mem), PI_FALSE, Flags,
922  AccessOffset[0], BytesToMap, DepEvents.size(),
923  DepEvents.data(), &OutEvent, &MappedPtr);
924  return MappedPtr;
925 }
926 
927 void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
928  void *MappedPtr,
929  std::vector<sycl::detail::pi::PiEvent> DepEvents,
930  sycl::detail::pi::PiEvent &OutEvent) {
931 
932  // Host queue is not supported here.
933  // All DepEvents are to the same Context.
934  // Using the plugin of the Queue.
935 
936  const PluginPtr &Plugin = Queue->getPlugin();
937  memUnmapHelper(Plugin, Queue->getHandleRef(),
938  pi::cast<sycl::detail::pi::PiMem>(Mem), MappedPtr,
939  DepEvents.size(), DepEvents.data(), &OutEvent);
940 }
941 
942 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
943  size_t Len, void *DstMem,
944  std::vector<sycl::detail::pi::PiEvent> DepEvents,
945  sycl::detail::pi::PiEvent *OutEvent,
946  const detail::EventImplPtr &OutEventImpl) {
947  assert(!SrcQueue->getContextImplPtr()->is_host() &&
948  "Host queue not supported in fill_usm.");
949 
950  if (!Len) { // no-op, but ensure DepEvents will still be waited on
951  if (!DepEvents.empty()) {
952  if (OutEventImpl != nullptr)
953  OutEventImpl->setHostEnqueueTime();
954  SrcQueue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
955  SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
956  OutEvent);
957  }
958  return;
959  }
960 
961  if (!SrcMem || !DstMem)
962  throw runtime_error("NULL pointer argument in memory copy operation.",
963  PI_ERROR_INVALID_VALUE);
964 
965  const PluginPtr &Plugin = SrcQueue->getPlugin();
966  if (OutEventImpl != nullptr)
967  OutEventImpl->setHostEnqueueTime();
968  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy>(
969  SrcQueue->getHandleRef(),
970  /* blocking */ PI_FALSE, DstMem, SrcMem, Len, DepEvents.size(),
971  DepEvents.data(), OutEvent);
972 }
973 
974 // TODO: This function will remain until ABI-breaking change
975 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
976  size_t Len, void *DstMem,
977  std::vector<sycl::detail::pi::PiEvent> DepEvents,
978  sycl::detail::pi::PiEvent *OutEvent) {
979  MemoryManager::copy_usm(SrcMem, SrcQueue, Len, DstMem, DepEvents, OutEvent,
980  nullptr);
981 }
982 
983 void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
984  int Pattern,
985  std::vector<sycl::detail::pi::PiEvent> DepEvents,
986  sycl::detail::pi::PiEvent *OutEvent,
987  const detail::EventImplPtr &OutEventImpl) {
988  assert(!Queue->getContextImplPtr()->is_host() &&
989  "Host queue not supported in fill_usm.");
990 
991  if (!Length) { // no-op, but ensure DepEvents will still be waited on
992  if (!DepEvents.empty()) {
993  if (OutEventImpl != nullptr)
994  OutEventImpl->setHostEnqueueTime();
995  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
996  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
997  }
998  return;
999  }
1000 
1001  if (!Mem)
1002  throw runtime_error("NULL pointer argument in memory fill operation.",
1003  PI_ERROR_INVALID_VALUE);
1004  if (OutEventImpl != nullptr)
1005  OutEventImpl->setHostEnqueueTime();
1006  const PluginPtr &Plugin = Queue->getPlugin();
1007  Plugin->call<PiApiKind::piextUSMEnqueueMemset>(
1008  Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(),
1009  DepEvents.data(), OutEvent);
1010 }
1011 
1012 // TODO: This function will remain until ABI-breaking change
1013 void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
1014  int Pattern,
1015  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1016  sycl::detail::pi::PiEvent *OutEvent) {
1017  MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent,
1018  nullptr); // OutEventImpl);
1019 }
1020 
1021 void MemoryManager::prefetch_usm(
1022  void *Mem, QueueImplPtr Queue, size_t Length,
1023  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1024  sycl::detail::pi::PiEvent *OutEvent,
1025  const detail::EventImplPtr &OutEventImpl) {
1026  assert(!Queue->getContextImplPtr()->is_host() &&
1027  "Host queue not supported in prefetch_usm.");
1028 
1029  const PluginPtr &Plugin = Queue->getPlugin();
1030  if (OutEventImpl != nullptr)
1031  OutEventImpl->setHostEnqueueTime();
1032  Plugin->call<PiApiKind::piextUSMEnqueuePrefetch>(
1033  Queue->getHandleRef(), Mem, Length, _pi_usm_migration_flags(0),
1034  DepEvents.size(), DepEvents.data(), OutEvent);
1035 }
1036 
1037 // TODO: This function will remain until ABI-breaking change
1038 void MemoryManager::prefetch_usm(
1039  void *Mem, QueueImplPtr Queue, size_t Length,
1040  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1041  sycl::detail::pi::PiEvent *OutEvent) {
1042  MemoryManager::prefetch_usm(Mem, Queue, Length, DepEvents, OutEvent, nullptr);
1043 }
1044 
1045 void MemoryManager::advise_usm(
1046  const void *Mem, QueueImplPtr Queue, size_t Length, pi_mem_advice Advice,
1047  std::vector<sycl::detail::pi::PiEvent> /*DepEvents*/,
1048  sycl::detail::pi::PiEvent *OutEvent,
1049  const detail::EventImplPtr &OutEventImpl) {
1050  assert(!Queue->getContextImplPtr()->is_host() &&
1051  "Host queue not supported in advise_usm.");
1052 
1053  const PluginPtr &Plugin = Queue->getPlugin();
1054  if (OutEventImpl != nullptr)
1055  OutEventImpl->setHostEnqueueTime();
1056  Plugin->call<PiApiKind::piextUSMEnqueueMemAdvise>(Queue->getHandleRef(), Mem,
1057  Length, Advice, OutEvent);
1058 }
1059 
1060 // TODO: This function will remain until ABI-breaking change
1061 void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue,
1062  size_t Length, pi_mem_advice Advice,
1063  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1064  sycl::detail::pi::PiEvent *OutEvent) {
1065  MemoryManager::advise_usm(Mem, Queue, Length, Advice, DepEvents, OutEvent,
1066  nullptr);
1067 }
1068 
1069 void MemoryManager::copy_2d_usm(
1070  const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem,
1071  size_t DstPitch, size_t Width, size_t Height,
1072  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1073  sycl::detail::pi::PiEvent *OutEvent,
1074  const detail::EventImplPtr &OutEventImpl) {
1075  assert(!Queue->getContextImplPtr()->is_host() &&
1076  "Host queue not supported in copy_2d_usm.");
1077 
1078  if (Width == 0 || Height == 0) {
1079  // no-op, but ensure DepEvents will still be waited on
1080  if (!DepEvents.empty()) {
1081  if (OutEventImpl != nullptr)
1082  OutEventImpl->setHostEnqueueTime();
1083  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1084  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1085  }
1086  return;
1087  }
1088 
1089  if (!DstMem || !SrcMem)
1090  throw sycl::exception(sycl::make_error_code(errc::invalid),
1091  "NULL pointer argument in 2D memory copy operation.");
1092 
1093  const PluginPtr &Plugin = Queue->getPlugin();
1094 
1095  pi_bool SupportsUSMMemcpy2D = false;
1097  Queue->getContextImplPtr()->getHandleRef(),
1099  &SupportsUSMMemcpy2D, nullptr);
1100 
1101  if (SupportsUSMMemcpy2D) {
1102  if (OutEventImpl != nullptr)
1103  OutEventImpl->setHostEnqueueTime();
1104  // Direct memcpy2D is supported so we use this function.
1105  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy2D>(
1106  Queue->getHandleRef(), /*blocking=*/PI_FALSE, DstMem, DstPitch, SrcMem,
1107  SrcPitch, Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1108  return;
1109  }
1110 
1111  // Otherwise we allow the special case where the copy is to or from host.
1112 #ifndef NDEBUG
1113  context Ctx = createSyclObjFromImpl<context>(Queue->getContextImplPtr());
1114  usm::alloc SrcAllocType = get_pointer_type(SrcMem, Ctx);
1115  usm::alloc DstAllocType = get_pointer_type(DstMem, Ctx);
1116  bool SrcIsHost =
1117  SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
1118  bool DstIsHost =
1119  DstAllocType == usm::alloc::unknown || DstAllocType == usm::alloc::host;
1120  assert((SrcIsHost || DstIsHost) && "In fallback path for copy_2d_usm either "
1121  "source or destination must be on host.");
1122 #endif // NDEBUG
1123 
1124  // The fallback in this case is to insert a copy per row.
1125  std::vector<OwnedPiEvent> CopyEventsManaged;
1126  CopyEventsManaged.reserve(Height);
1127  // We'll need continuous range of events for a wait later as well.
1128  std::vector<sycl::detail::pi::PiEvent> CopyEvents(Height);
1129  if (OutEventImpl != nullptr)
1130  OutEventImpl->setHostEnqueueTime();
1131  for (size_t I = 0; I < Height; ++I) {
1132  char *DstItBegin = static_cast<char *>(DstMem) + I * DstPitch;
1133  const char *SrcItBegin = static_cast<const char *>(SrcMem) + I * SrcPitch;
1134  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy>(
1135  Queue->getHandleRef(), /* blocking */ PI_FALSE, DstItBegin, SrcItBegin,
1136  Width, DepEvents.size(), DepEvents.data(), CopyEvents.data() + I);
1137  CopyEventsManaged.emplace_back(CopyEvents[I], Plugin,
1138  /*TakeOwnership=*/true);
1139  }
1140  if (OutEventImpl != nullptr)
1141  OutEventImpl->setHostEnqueueTime();
1142  // Then insert a wait to coalesce the copy events.
1143  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1144  Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent);
1145 }
1146 
1147 // TODO: This function will remain until ABI-breaking change
1148 void MemoryManager::copy_2d_usm(
1149  const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem,
1150  size_t DstPitch, size_t Width, size_t Height,
1151  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1152  sycl::detail::pi::PiEvent *OutEvent) {
1153  MemoryManager::copy_2d_usm(SrcMem, SrcPitch, Queue, DstMem, DstPitch, Width,
1154  Height, DepEvents, OutEvent, nullptr);
1155 }
1156 
1157 void MemoryManager::fill_2d_usm(
1158  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1159  const std::vector<char> &Pattern,
1160  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1161  sycl::detail::pi::PiEvent *OutEvent,
1162  const detail::EventImplPtr &OutEventImpl) {
1163  assert(!Queue->getContextImplPtr()->is_host() &&
1164  "Host queue not supported in fill_2d_usm.");
1165 
1166  if (Width == 0 || Height == 0) {
1167  // no-op, but ensure DepEvents will still be waited on
1168  if (!DepEvents.empty()) {
1169  if (OutEventImpl != nullptr)
1170  OutEventImpl->setHostEnqueueTime();
1171  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1172  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1173  }
1174  return;
1175  }
1176 
1177  if (!DstMem)
1178  throw sycl::exception(sycl::make_error_code(errc::invalid),
1179  "NULL pointer argument in 2D memory fill operation.");
1180  if (OutEventImpl != nullptr)
1181  OutEventImpl->setHostEnqueueTime();
1182  const PluginPtr &Plugin = Queue->getPlugin();
1183  Plugin->call<PiApiKind::piextUSMEnqueueFill2D>(
1184  Queue->getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(),
1185  Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1186 }
1187 
1188 // TODO: This function will remain until ABI-breaking change
1189 void MemoryManager::fill_2d_usm(
1190  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1191  const std::vector<char> &Pattern,
1192  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1193  sycl::detail::pi::PiEvent *OutEvent) {
1194  MemoryManager::fill_2d_usm(DstMem, Queue, Pitch, Width, Height, Pattern,
1195  DepEvents, OutEvent, nullptr);
1196 }
1197 
1198 void MemoryManager::memset_2d_usm(
1199  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1200  char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1201  sycl::detail::pi::PiEvent *OutEvent,
1202  const detail::EventImplPtr &OutEventImpl) {
1203  assert(!Queue->getContextImplPtr()->is_host() &&
1204  "Host queue not supported in fill_2d_usm.");
1205 
1206  if (Width == 0 || Height == 0) {
1207  // no-op, but ensure DepEvents will still be waited on
1208  if (!DepEvents.empty()) {
1209  if (OutEventImpl != nullptr)
1210  OutEventImpl->setHostEnqueueTime();
1211  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1212  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1213  }
1214  return;
1215  }
1216 
1217  if (!DstMem)
1218  throw sycl::exception(
1219  sycl::make_error_code(errc::invalid),
1220  "NULL pointer argument in 2D memory memset operation.");
1221  if (OutEventImpl != nullptr)
1222  OutEventImpl->setHostEnqueueTime();
1223  const PluginPtr &Plugin = Queue->getPlugin();
1224  Plugin->call<PiApiKind::piextUSMEnqueueMemset2D>(
1225  Queue->getHandleRef(), DstMem, Pitch, static_cast<int>(Value), Width,
1226  Height, DepEvents.size(), DepEvents.data(), OutEvent);
1227 }
1228 
1229 // TODO: This function will remain until ABI-breaking change
1230 void MemoryManager::memset_2d_usm(
1231  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1232  char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1233  sycl::detail::pi::PiEvent *OutEvent) {
1234  MemoryManager::memset_2d_usm(DstMem, Queue, Pitch, Width, Height, Value,
1235  DepEvents, OutEvent, nullptr);
1236 }
1237 
1238 static void
1240  DeviceGlobalMapEntry *DeviceGlobalEntry,
1241  size_t NumBytes, size_t Offset, const void *Src,
1242  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1243  sycl::detail::pi::PiEvent *OutEvent,
1244  const detail::EventImplPtr &OutEventImpl) {
1245  // Get or allocate USM memory for the device_global.
1246  DeviceGlobalUSMMem &DeviceGlobalUSM =
1247  DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
1248  void *Dest = DeviceGlobalUSM.getPtr();
1249 
1250  // OwnedPiEvent will keep the initialization event alive for the duration
1251  // of this function call.
1252  OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin());
1253 
1254  // We may need addtional events, so create a non-const dependency events list
1255  // to use if we need to modify it.
1256  std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1257  const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1258  ZIEvent ? AuxDepEventsStorage : DepEvents;
1259 
1260  // If there is a zero-initializer event the memory operation should wait for
1261  // it.
1262  if (ZIEvent) {
1263  AuxDepEventsStorage = DepEvents;
1264  AuxDepEventsStorage.push_back(ZIEvent.GetEvent());
1265  }
1266 
1267  MemoryManager::copy_usm(Src, Queue, NumBytes,
1268  reinterpret_cast<char *>(Dest) + Offset,
1269  ActualDepEvents, OutEvent, OutEventImpl);
1270 }
1271 
1273  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1274  size_t NumBytes, size_t Offset, void *Dest,
1275  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1276  sycl::detail::pi::PiEvent *OutEvent,
1277  const detail::EventImplPtr &OutEventImpl) {
1278  // Get or allocate USM memory for the device_global. Since we are reading from
1279  // it, we need it initialized if it has not been yet.
1280  DeviceGlobalUSMMem &DeviceGlobalUSM =
1281  DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
1282  void *Src = DeviceGlobalUSM.getPtr();
1283 
1284  // OwnedPiEvent will keep the initialization event alive for the duration
1285  // of this function call.
1286  OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin());
1287 
1288  // We may need addtional events, so create a non-const dependency events list
1289  // to use if we need to modify it.
1290  std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1291  const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1292  ZIEvent ? AuxDepEventsStorage : DepEvents;
1293 
1294  // If there is a zero-initializer event the memory operation should wait for
1295  // it.
1296  if (ZIEvent) {
1297  AuxDepEventsStorage = DepEvents;
1298  AuxDepEventsStorage.push_back(ZIEvent.GetEvent());
1299  }
1300 
1301  MemoryManager::copy_usm(reinterpret_cast<const char *>(Src) + Offset, Queue,
1302  NumBytes, Dest, ActualDepEvents, OutEvent,
1303  OutEventImpl);
1304 }
1305 
1308  DeviceGlobalMapEntry *DeviceGlobalEntry) {
1309  assert(DeviceGlobalEntry->MIsDeviceImageScopeDecorated &&
1310  "device_global is not device image scope decorated.");
1311 
1312  // If the device global is used in multiple device images we cannot proceed.
1313  if (DeviceGlobalEntry->MImageIdentifiers.size() > 1)
1314  throw sycl::exception(make_error_code(errc::invalid),
1315  "More than one image exists with the device_global.");
1316 
1317  // If there are no kernels using the device_global we cannot proceed.
1318  if (DeviceGlobalEntry->MImageIdentifiers.size() == 0)
1319  throw sycl::exception(make_error_code(errc::invalid),
1320  "No image exists with the device_global.");
1321 
1322  // Look for cached programs with the device_global.
1323  device Device = Queue->get_device();
1324  ContextImplPtr ContextImpl = Queue->getContextImplPtr();
1325  std::optional<sycl::detail::pi::PiProgram> CachedProgram =
1326  ContextImpl->getProgramForDeviceGlobal(Device, DeviceGlobalEntry);
1327  if (CachedProgram)
1328  return *CachedProgram;
1329 
1330  // If there was no cached program, build one.
1331  auto Context = createSyclObjFromImpl<context>(ContextImpl);
1332  ProgramManager &PM = ProgramManager::getInstance();
1333  RTDeviceBinaryImage &Img =
1334  PM.getDeviceImage(DeviceGlobalEntry->MImages, Context, Device);
1335  device_image_plain DeviceImage =
1336  PM.getDeviceImageFromBinaryImage(&Img, Context, Device);
1337  device_image_plain BuiltImage = PM.build(DeviceImage, {Device}, {});
1338  return getSyclObjImpl(BuiltImage)->get_program_ref();
1339 }
1340 
1342  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1343  size_t NumBytes, size_t Offset, const void *Src,
1344  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1345  sycl::detail::pi::PiEvent *OutEvent) {
1346  sycl::detail::pi::PiProgram Program =
1347  getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
1348  const PluginPtr &Plugin = Queue->getPlugin();
1350  Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(),
1351  false, NumBytes, Offset, Src, DepEvents.size(), DepEvents.data(),
1352  OutEvent);
1353 }
1354 
1356  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1357  size_t NumBytes, size_t Offset, void *Dest,
1358  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1359  sycl::detail::pi::PiEvent *OutEvent) {
1360  sycl::detail::pi::PiProgram Program =
1361  getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
1362  const PluginPtr &Plugin = Queue->getPlugin();
1364  Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(),
1365  false, NumBytes, Offset, Dest, DepEvents.size(), DepEvents.data(),
1366  OutEvent);
1367 }
1368 
1369 void MemoryManager::copy_to_device_global(
1370  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1371  size_t NumBytes, size_t Offset, const void *SrcMem,
1372  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1373  sycl::detail::pi::PiEvent *OutEvent,
1374  const detail::EventImplPtr &OutEventImpl) {
1375  DeviceGlobalMapEntry *DGEntry =
1376  detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1377  DeviceGlobalPtr);
1378  assert(DGEntry &&
1379  DGEntry->MIsDeviceImageScopeDecorated == IsDeviceImageScoped &&
1380  "Invalid copy operation for device_global.");
1381  assert(DGEntry->MDeviceGlobalTSize >= Offset + NumBytes &&
1382  "Copy to device_global is out of bounds.");
1383 
1384  if (IsDeviceImageScoped)
1385  memcpyToDeviceGlobalDirect(Queue, DGEntry, NumBytes, Offset, SrcMem,
1386  DepEvents, OutEvent);
1387  else
1388  memcpyToDeviceGlobalUSM(Queue, DGEntry, NumBytes, Offset, SrcMem, DepEvents,
1389  OutEvent, OutEventImpl);
1390 }
1391 
1392 // TODO: This function will remain until ABI-breaking change
1393 void MemoryManager::copy_to_device_global(
1394  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1395  size_t NumBytes, size_t Offset, const void *SrcMem,
1396  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1397  sycl::detail::pi::PiEvent *OutEvent) {
1398  copy_to_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes,
1399  Offset, SrcMem, DepEvents, OutEvent, nullptr);
1400 }
1401 
1402 void MemoryManager::copy_from_device_global(
1403  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1404  size_t NumBytes, size_t Offset, void *DstMem,
1405  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1406  sycl::detail::pi::PiEvent *OutEvent,
1407  const detail::EventImplPtr &OutEventImpl) {
1408  DeviceGlobalMapEntry *DGEntry =
1409  detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1410  DeviceGlobalPtr);
1411  assert(DGEntry &&
1412  DGEntry->MIsDeviceImageScopeDecorated == IsDeviceImageScoped &&
1413  "Invalid copy operation for device_global.");
1414  assert(DGEntry->MDeviceGlobalTSize >= Offset + NumBytes &&
1415  "Copy from device_global is out of bounds.");
1416 
1417  if (IsDeviceImageScoped)
1418  memcpyFromDeviceGlobalDirect(Queue, DGEntry, NumBytes, Offset, DstMem,
1419  DepEvents, OutEvent);
1420  else
1421  memcpyFromDeviceGlobalUSM(Queue, DGEntry, NumBytes, Offset, DstMem,
1422  DepEvents, OutEvent, OutEventImpl);
1423 }
1424 
1425 // TODO: This function will remain until ABI-breaking change
1426 void MemoryManager::copy_from_device_global(
1427  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1428  size_t NumBytes, size_t Offset, void *DstMem,
1429  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1430  sycl::detail::pi::PiEvent *OutEvent) {
1431  copy_from_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes,
1432  Offset, DstMem, DepEvents, OutEvent, nullptr);
1433 }
1434 
1435 // Command buffer methods
1436 void MemoryManager::ext_oneapi_copyD2D_cmd_buffer(
1438  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1439  void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1440  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
1441  unsigned int SrcElemSize, void *DstMem, unsigned int DimDst,
1442  sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
1443  sycl::id<3> DstOffset, unsigned int DstElemSize,
1444  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1445  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1446  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1447  (void)DstAccessRange;
1448 
1449  const PluginPtr &Plugin = Context->getPlugin();
1450 
1451  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1452  TermPositions SrcPos, DstPos;
1453  prepTermPositions(SrcPos, DimSrc, MemType);
1454  prepTermPositions(DstPos, DimDst, MemType);
1455 
1456  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1457  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1458  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
1459  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1460  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1461 
1462  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1463  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1464  "Images are not supported in Graphs");
1465  }
1466 
1467  if (1 == DimDst && 1 == DimSrc) {
1469  CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1470  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), SrcXOffBytes,
1471  DstXOffBytes, SrcAccessRangeWidthBytes, Deps.size(), Deps.data(),
1472  OutSyncPoint);
1473  } else {
1474  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
1475  // calculate both src and dest pitch using region[0], which is not correct
1476  // if src and dest are not the same size.
1477  size_t SrcRowPitch = SrcSzWidthBytes;
1478  size_t SrcSlicePitch = (DimSrc <= 1)
1479  ? SrcSzWidthBytes
1480  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
1481  size_t DstRowPitch = DstSzWidthBytes;
1482  size_t DstSlicePitch = (DimDst <= 1)
1483  ? DstSzWidthBytes
1484  : DstSzWidthBytes * DstSize[DstPos.YTerm];
1485 
1486  pi_buff_rect_offset_struct SrcOrigin{SrcXOffBytes, SrcOffset[SrcPos.YTerm],
1487  SrcOffset[SrcPos.ZTerm]};
1488  pi_buff_rect_offset_struct DstOrigin{DstXOffBytes, DstOffset[DstPos.YTerm],
1489  DstOffset[DstPos.ZTerm]};
1490  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
1491  SrcAccessRange[SrcPos.YTerm],
1492  SrcAccessRange[SrcPos.ZTerm]};
1493 
1495  CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1496  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), &SrcOrigin,
1497  &DstOrigin, &Region, SrcRowPitch, SrcSlicePitch, DstRowPitch,
1498  DstSlicePitch, Deps.size(), Deps.data(), OutSyncPoint);
1499  }
1500 }
1501 
1502 void MemoryManager::ext_oneapi_copyD2H_cmd_buffer(
1504  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1505  void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1506  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
1507  unsigned int SrcElemSize, char *DstMem, unsigned int DimDst,
1508  sycl::range<3> DstSize, sycl::id<3> DstOffset, unsigned int DstElemSize,
1509  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1510  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1511  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1512 
1513  const PluginPtr &Plugin = Context->getPlugin();
1514 
1515  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1516  TermPositions SrcPos, DstPos;
1517  prepTermPositions(SrcPos, DimSrc, MemType);
1518  prepTermPositions(DstPos, DimDst, MemType);
1519 
1520  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1521  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1522  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
1523  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1524  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1525 
1526  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1527  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1528  "Images are not supported in Graphs");
1529  }
1530 
1531  if (1 == DimDst && 1 == DimSrc) {
1532  pi_result Result =
1533  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferRead>(
1534  CommandBuffer,
1535  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1536  SrcXOffBytes, SrcAccessRangeWidthBytes, DstMem + DstXOffBytes,
1537  Deps.size(), Deps.data(), OutSyncPoint);
1538 
1539  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1540  throw sycl::exception(
1541  sycl::make_error_code(sycl::errc::feature_not_supported),
1542  "Device-to-host buffer copy command not supported by graph backend");
1543  } else {
1544  Plugin->checkPiResult(Result);
1545  }
1546  } else {
1547  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1548  size_t BufferSlicePitch =
1549  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
1550  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1551  size_t HostSlicePitch =
1552  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
1553 
1554  pi_buff_rect_offset_struct BufferOffset{
1555  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
1556  pi_buff_rect_offset_struct HostOffset{DstXOffBytes, DstOffset[DstPos.YTerm],
1557  DstOffset[DstPos.ZTerm]};
1558  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
1559  SrcAccessRange[SrcPos.YTerm],
1560  SrcAccessRange[SrcPos.ZTerm]};
1561 
1562  pi_result Result =
1563  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferReadRect>(
1564  CommandBuffer,
1565  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1566  &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1567  BufferSlicePitch, HostRowPitch, HostSlicePitch, DstMem, Deps.size(),
1568  Deps.data(), OutSyncPoint);
1569  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1570  throw sycl::exception(
1571  sycl::make_error_code(sycl::errc::feature_not_supported),
1572  "Device-to-host buffer copy command not supported by graph backend");
1573  } else {
1574  Plugin->checkPiResult(Result);
1575  }
1576  }
1577 }
1578 
1579 void MemoryManager::ext_oneapi_copyH2D_cmd_buffer(
1581  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1582  char *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1583  sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem,
1584  unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
1585  sycl::id<3> DstOffset, unsigned int DstElemSize,
1586  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1587  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1588  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1589 
1590  const PluginPtr &Plugin = Context->getPlugin();
1591 
1592  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1593  TermPositions SrcPos, DstPos;
1594  prepTermPositions(SrcPos, DimSrc, MemType);
1595  prepTermPositions(DstPos, DimDst, MemType);
1596 
1597  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1598  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1599  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
1600  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1601  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1602 
1603  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1604  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1605  "Images are not supported in Graphs");
1606  }
1607 
1608  if (1 == DimDst && 1 == DimSrc) {
1609  pi_result Result =
1610  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferWrite>(
1611  CommandBuffer,
1612  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1613  DstXOffBytes, DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes,
1614  Deps.size(), Deps.data(), OutSyncPoint);
1615 
1616  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1617  throw sycl::exception(
1618  sycl::make_error_code(sycl::errc::feature_not_supported),
1619  "Host-to-device buffer copy command not supported by graph backend");
1620  } else {
1621  Plugin->checkPiResult(Result);
1622  }
1623  } else {
1624  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1625  size_t BufferSlicePitch =
1626  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
1627  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1628  size_t HostSlicePitch =
1629  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
1630 
1631  pi_buff_rect_offset_struct BufferOffset{
1632  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
1633  pi_buff_rect_offset_struct HostOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm],
1634  SrcOffset[SrcPos.ZTerm]};
1635  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
1636  DstAccessRange[DstPos.YTerm],
1637  DstAccessRange[DstPos.ZTerm]};
1638 
1639  pi_result Result =
1641  CommandBuffer,
1642  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1643  &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1644  BufferSlicePitch, HostRowPitch, HostSlicePitch, SrcMem, Deps.size(),
1645  Deps.data(), OutSyncPoint);
1646 
1647  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1648  throw sycl::exception(
1649  sycl::make_error_code(sycl::errc::feature_not_supported),
1650  "Host-to-device buffer copy command not supported by graph backend");
1651  } else {
1652  Plugin->checkPiResult(Result);
1653  }
1654  }
1655 }
1656 
1657 void MemoryManager::ext_oneapi_copy_usm_cmd_buffer(
1658  ContextImplPtr Context, const void *SrcMem,
1659  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, size_t Len,
1660  void *DstMem, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1661  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1662  if (!SrcMem || !DstMem)
1663  throw runtime_error("NULL pointer argument in memory copy operation.",
1664  PI_ERROR_INVALID_VALUE);
1665 
1666  const PluginPtr &Plugin = Context->getPlugin();
1667  pi_result Result =
1668  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemcpyUSM>(
1669  CommandBuffer, DstMem, SrcMem, Len, Deps.size(), Deps.data(),
1670  OutSyncPoint);
1671  if (Result == PI_ERROR_UNSUPPORTED_FEATURE) {
1672  throw sycl::exception(
1673  sycl::make_error_code(sycl::errc::feature_not_supported),
1674  "USM copy command not supported by graph backend");
1675  } else {
1676  Plugin->checkPiResult(Result);
1677  }
1678 }
1679 
1680 void MemoryManager::ext_oneapi_fill_usm_cmd_buffer(
1682  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem,
1683  size_t Len, int Pattern, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1684  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1685 
1686  if (!DstMem)
1687  throw runtime_error("NULL pointer argument in memory fill operation.",
1688  PI_ERROR_INVALID_VALUE);
1689 
1690  const PluginPtr &Plugin = Context->getPlugin();
1691  // Pattern is interpreted as an unsigned char so pattern size is always 1.
1692  size_t PatternSize = 1;
1694  CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(),
1695  Deps.data(), OutSyncPoint);
1696 }
1697 
1698 void MemoryManager::ext_oneapi_fill_cmd_buffer(
1700  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1701  void *Mem, size_t PatternSize, const char *Pattern, unsigned int Dim,
1702  sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
1703  unsigned int ElementSize,
1704  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1705  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1706  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1707 
1708  const PluginPtr &Plugin = Context->getPlugin();
1709  if (SYCLMemObj->getType() != detail::SYCLMemObjI::MemObjType::Buffer) {
1710  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1711  "Images are not supported in Graphs");
1712  }
1713 
1714  // 2D and 3D buffers accessors can't have custom range or the data will
1715  // likely be discontiguous.
1716  bool RangesUsable = (Dim <= 1) || (Size == AccessRange);
1717  // For 2D and 3D buffers, the offset must be 0, or the data will be
1718  // discontiguous.
1719  bool OffsetUsable = (Dim <= 1) || (AccessOffset == sycl::id<3>{0, 0, 0});
1720  size_t RangeMultiplier = AccessRange[0] * AccessRange[1] * AccessRange[2];
1721 
1722  if (RangesUsable && OffsetUsable) {
1724  CommandBuffer, pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
1725  PatternSize, AccessOffset[0] * ElementSize,
1726  RangeMultiplier * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
1727  return;
1728  }
1729  // The sycl::handler uses a parallel_for kernel in the case of unusable
1730  // Range or Offset, not CG:Fill. So we should not be here.
1731  throw runtime_error("Not supported configuration of fill requested",
1732  PI_ERROR_INVALID_OPERATION);
1733 }
1734 
1735 void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer(
1737  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *Mem,
1738  size_t Length, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1739  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1740  assert(!Context->is_host() && "Host queue not supported in prefetch_usm.");
1741 
1742  const PluginPtr &Plugin = Context->getPlugin();
1744  CommandBuffer, Mem, Length, _pi_usm_migration_flags(0), Deps.size(),
1745  Deps.data(), OutSyncPoint);
1746 }
1747 
1748 void MemoryManager::ext_oneapi_advise_usm_cmd_buffer(
1750  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const void *Mem,
1751  size_t Length, pi_mem_advice Advice,
1752  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1753  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1754  assert(!Context->is_host() && "Host queue not supported in advise_usm.");
1755 
1756  const PluginPtr &Plugin = Context->getPlugin();
1758  CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(),
1759  OutSyncPoint);
1760 }
1761 
1762 void MemoryManager::copy_image_bindless(
1763  void *Src, QueueImplPtr Queue, void *Dst,
1765  const sycl::detail::pi::PiMemImageFormat &Format,
1771  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1772  sycl::detail::pi::PiEvent *OutEvent) {
1773 
1774  assert(!Queue->getContextImplPtr()->is_host() &&
1775  "Host queue not supported in copy_image_bindless.");
1776  assert((Flags == (sycl::detail::pi::PiImageCopyFlags)
1780  "Invalid flags passed to copy_image_bindless.");
1781  if (!Dst || !Src)
1782  throw sycl::exception(
1783  sycl::make_error_code(errc::invalid),
1784  "NULL pointer argument in bindless image copy operation.");
1785 
1786  const detail::PluginPtr &Plugin = Queue->getPlugin();
1787  Plugin->call<PiApiKind::piextMemImageCopy>(
1788  Queue->getHandleRef(), Dst, Src, &Format, &Desc, Flags, &SrcOffset,
1789  &DstOffset, &CopyExtent, &HostExtent, DepEvents.size(), DepEvents.data(),
1790  OutEvent);
1791 }
1792 
1793 } // namespace detail
1794 } // namespace _V1
1795 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
static void * allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range< 3 > Range, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateMemImage(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateImageObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const sycl::property_list &PropsList)
static void * allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::property_list &PropsList)
static void * allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const size_t Size, const sycl::property_list &PropsList)
static void * allocateMemBuffer(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
virtual void * allocateHostMem()=0
virtual void releaseMem(ContextImplPtr Context, void *Ptr)=0
virtual MemObjType getType() const =0
virtual void releaseHostMem(void *Ptr)=0
virtual void * allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, sycl::detail::pi::PiEvent &InteropEvent)=0
static void bufferAssociateNotification(const void *, const void *)
static void bufferReleaseNotification(const void *, const void *)
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
A unique identifier of an item in an index space.
Definition: id.hpp:36
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
size_t size() const
Definition: range.hpp:56
::pi_mem_flags PiMemFlags
Definition: pi.hpp:142
::pi_ext_sync_point PiExtSyncPoint
Definition: pi.hpp:156
static void memcpyToDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone)
static void memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
void prepTermPositions(TermPositions &pos, int Dimensions, detail::SYCLMemObjI::MemObjType Type)
void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent >, sycl::detail::pi::PiEvent &, const detail::EventImplPtr &)
void memUnmapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static void waitForEvents(const std::vector< EventImplPtr > &Events)
void memBufferMapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem)
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr)
static void memcpyToDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static sycl::detail::pi::PiProgram getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry)
void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, size_t AllocSize, size_t GuardZone, uint64_t CorrelationID)
void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, uint64_t CorrelationID)
void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 >, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void memcpyFromDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:34
void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:130
static sycl::detail::pi::PiMemFlags getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly)
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:3233
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:575
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:85
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
Definition: accessor.hpp:3233
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
Definition: access.hpp:18
static sycl::event fill(sycl::queue q, void *dev_ptr, const T &pattern, size_t count)
Set pattern to the first count elements of type T starting from dev_ptr.
Definition: memory.hpp:185
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
Definition: pi_cuda.cpp:905
pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_cuda.cpp:236
constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY
Definition: pi.h:761
uintptr_t pi_native_handle
Definition: pi.h:217
pi_bitfield pi_map_flags
Definition: pi.h:768
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:737
_pi_result
Definition: pi.h:224
pi_uint32 pi_bool
Definition: pi.h:215
pi_result piextMemImageCopy(pi_queue command_queue, void *dst_ptr, void *src_ptr, const pi_image_format *image_format, const pi_image_desc *image_desc, const pi_image_copy_flags flags, pi_image_offset src_offset, pi_image_offset dst_offset, pi_image_region copy_extent, pi_image_region host_extent, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to copy image data Host to Device or Device to Host.
Definition: pi_cuda.cpp:427
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_cuda.cpp:934
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC
Definition: pi.h:765
_pi_mem_advice
Definition: pi.h:599
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:789
pi_bitfield pi_mem_properties
Definition: pi.h:774
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:674
pi_result piextCommandBufferMemBufferCopy(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer copy command to the command-buffer.
Definition: pi_cuda.cpp:1108
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_cuda.cpp:227
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_cuda.cpp:924
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Device global variable.
Definition: pi_cuda.cpp:993
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:657
_pi_usm_migration_flags
Definition: pi.h:1975
_pi_image_copy_flags
Definition: pi.h:676
const pi_bool PI_FALSE
Definition: pi.h:685
pi_result piextCommandBufferMemBufferReadRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer read command to the command-buffer.
Definition: pi_cuda.cpp:1139
pi_result piextCommandBufferMemBufferRead(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer read command to the command-buffer.
Definition: pi_cuda.cpp:1130
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
Definition: pi_cuda.cpp:954
pi_result piextCommandBufferMemBufferFill(pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer fill command to the command-buffer.
Definition: pi_cuda.cpp:1174
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:700
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:726
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:712
@ PI_BUFFER_CREATE_TYPE_REGION
Definition: pi.h:682
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:773
uint32_t pi_uint32
Definition: pi.h:213
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_cuda.cpp:598
pi_bitfield pi_mem_flags
Definition: pi.h:758
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_cuda.cpp:762
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D fill API.
Definition: pi_cuda.cpp:941
pi_result piextCommandBufferMemBufferCopyRect(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer copy command to the command-buffer.
Definition: pi_cuda.cpp:1118
pi_result piMemRelease(pi_mem mem)
Definition: pi_cuda.cpp:225
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:763
constexpr pi_map_flags PI_MAP_WRITE
Definition: pi.h:770
constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION
Definition: pi.h:771
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:815
pi_result piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM memcpy command to the command-buffer.
Definition: pi_cuda.cpp:1099
constexpr pi_map_flags PI_MAP_READ
Definition: pi.h:769
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:750
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_cuda.cpp:836
pi_result piextCommandBufferMemBufferWrite(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer write command to the command-buffer.
Definition: pi_cuda.cpp:1152
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:760
pi_result piextCommandBufferFillUSM(pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM fill command to the command-buffer.
Definition: pi_cuda.cpp:1184
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:801
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:493
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
Definition: pi_cuda.cpp:210
pi_result piextCommandBufferAdviseUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t length, pi_mem_advice advice, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Advise command to the command-buffer.
Definition: pi_cuda.cpp:1204
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
Definition: pi_cuda.cpp:966
constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
Definition: pi.h:776
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:824
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_cuda.cpp:913
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:686
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:124
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
Definition: pi_cuda.cpp:1002
pi_result piextCommandBufferMemBufferWriteRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer write command to the command-buffer.
Definition: pi_cuda.cpp:1161
pi_result piextCommandBufferPrefetchUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Prefetch command to the command-buffer.
Definition: pi_cuda.cpp:1195
pi_result piEventRetain(pi_event event)
Definition: pi_cuda.cpp:615
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL
Definition: pi.h:775
DeviceGlobalUSMMem & getOrAllocateDeviceGlobalUSM(const std::shared_ptr< queue_impl > &QueueImpl)
std::unordered_set< RTDeviceBinaryImage * > MImages
OwnedPiEvent getInitEvent(const PluginPtr &Plugin)
sycl::detail::pi::PiEvent GetEvent()
Definition: pi_utils.hpp:50