DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_manager.cpp
Go to the documentation of this file.
1 //==-------------- memory_manager.cpp --------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include <detail/event_impl.hpp>
14 #include <detail/pi_utils.hpp>
15 #include <detail/queue_impl.hpp>
16 #include <detail/xpti_registry.hpp>
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstring>
21 #include <vector>
22 
23 #ifdef XPTI_ENABLE_INSTRUMENTATION
24 #include <xpti/xpti_data_types.h>
25 #include <xpti/xpti_trace_framework.hpp>
26 #endif
27 
28 namespace sycl {
29 inline namespace _V1 {
30 namespace detail {
31 
32 #ifdef XPTI_ENABLE_INSTRUMENTATION
33 uint8_t GMemAllocStreamID;
34 xpti::trace_event_data_t *GMemAllocEvent;
35 #endif
36 
37 uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize,
38  size_t GuardZone) {
39  (void)ObjHandle;
40  (void)AllocSize;
41  (void)GuardZone;
42  uint64_t CorrelationID = 0;
43 #ifdef XPTI_ENABLE_INSTRUMENTATION
44  constexpr uint16_t NotificationTraceType =
45  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_begin);
46  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
47  xpti::mem_alloc_data_t MemAlloc{ObjHandle, 0 /* alloc ptr */, AllocSize,
48  GuardZone};
49 
50  CorrelationID = xptiGetUniqueId();
51  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
52  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
53  }
54 #endif
55  return CorrelationID;
56 }
57 
58 void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
59  size_t AllocSize, size_t GuardZone,
60  uint64_t CorrelationID) {
61  (void)ObjHandle;
62  (void)AllocPtr;
63  (void)AllocSize;
64  (void)GuardZone;
65  (void)CorrelationID;
66 #ifdef XPTI_ENABLE_INSTRUMENTATION
67  constexpr uint16_t NotificationTraceType =
68  static_cast<uint16_t>(xpti::trace_point_type_t::mem_alloc_end);
69  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
70  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, AllocSize, GuardZone};
71 
72  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
73  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
74  }
75 #endif
76 }
77 
78 uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr) {
79  (void)ObjHandle;
80  (void)AllocPtr;
81  uint64_t CorrelationID = 0;
82 #ifdef XPTI_ENABLE_INSTRUMENTATION
83  constexpr uint16_t NotificationTraceType =
84  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_begin);
85  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
86  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
87  0 /* guard zone */};
88 
89  CorrelationID = xptiGetUniqueId();
90  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
91  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
92  }
93 #endif
94  return CorrelationID;
95 }
96 
97 void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
98  uint64_t CorrelationID) {
99  (void)ObjHandle;
100  (void)AllocPtr;
101  (void)CorrelationID;
102 #ifdef XPTI_ENABLE_INSTRUMENTATION
103  constexpr uint16_t NotificationTraceType =
104  static_cast<uint16_t>(xpti::trace_point_type_t::mem_release_end);
105  if (xptiCheckTraceEnabled(GMemAllocStreamID, NotificationTraceType)) {
106  xpti::mem_alloc_data_t MemAlloc{ObjHandle, AllocPtr, 0 /* alloc size */,
107  0 /* guard zone */};
108 
109  xptiNotifySubscribers(GMemAllocStreamID, NotificationTraceType,
110  GMemAllocEvent, nullptr, CorrelationID, &MemAlloc);
111  }
112 #endif
113 }
114 
115 static void waitForEvents(const std::vector<EventImplPtr> &Events) {
116  // Assuming all events will be on the same device or
117  // devices associated with the same Backend.
118  if (!Events.empty()) {
119  const PluginPtr &Plugin = Events[0]->getPlugin();
120  std::vector<sycl::detail::pi::PiEvent> PiEvents(Events.size());
121  std::transform(Events.begin(), Events.end(), PiEvents.begin(),
122  [](const EventImplPtr &EventImpl) {
123  return EventImpl->getHandleRef();
124  });
125  Plugin->call<PiApiKind::piEventsWait>(PiEvents.size(), &PiEvents[0]);
126  }
127 }
128 
130  pi_mem_flags Flags, size_t Size, void *HostPtr,
131  pi_mem *RetMem, const pi_mem_properties *Props) {
132 #ifdef XPTI_ENABLE_INSTRUMENTATION
133  uint64_t CorrID = 0;
134 #endif
135  // We only want to instrument piMemBufferCreate
136  {
137 #ifdef XPTI_ENABLE_INSTRUMENTATION
138  CorrID =
139  emitMemAllocBeginTrace(0 /* mem object */, Size, 0 /* guard zone */);
140  xpti::utils::finally _{[&] {
141  // C-style cast is required for MSVC
142  uintptr_t MemObjID = (uintptr_t)(*RetMem);
143  pi_native_handle Ptr = 0;
144  // Always use call_nocheck here, because call may throw an exception,
145  // and this lambda will be called from destructor, which in combination
146  // rewards us with UB.
147  // When doing buffer interop we don't know what device the memory should
148  // be resident on, so pass nullptr for Device param. Buffer interop may
149  // not be supported by all backends.
150  Plugin->call_nocheck<PiApiKind::piextMemGetNativeHandle>(
151  *RetMem, /*Dev*/ nullptr, &Ptr);
152  emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */,
153  CorrID);
154  }};
155 #endif
156  if (Size)
157  Plugin->call<PiApiKind::piMemBufferCreate>(Ctx, Flags, Size, HostPtr,
158  RetMem, Props);
159  }
160 }
161 
162 void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem) {
163  // FIXME piMemRelease does not guarante memory release. It is only true if
164  // reference counter is 1. However, SYCL runtime currently only calls
165  // piMemRetain only for OpenCL interop
166 #ifdef XPTI_ENABLE_INSTRUMENTATION
167  uint64_t CorrID = 0;
168  // C-style cast is required for MSVC
169  uintptr_t MemObjID = (uintptr_t)(Mem);
170  uintptr_t Ptr = 0;
171  // Do not make unnecessary PI calls without instrumentation enabled
172  if (xptiTraceEnabled()) {
173  pi_native_handle PtrHandle = 0;
174  // When doing buffer interop we don't know what device the memory should be
175  // resident on, so pass nullptr for Device param. Buffer interop may not be
176  // supported by all backends.
177  Plugin->call<PiApiKind::piextMemGetNativeHandle>(Mem, /*Dev*/ nullptr,
178  &PtrHandle);
179  Ptr = (uintptr_t)(PtrHandle);
180  }
181 #endif
182  // We only want to instrument piMemRelease
183  {
184 #ifdef XPTI_ENABLE_INSTRUMENTATION
185  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
186  xpti::utils::finally _{
187  [&] { emitMemReleaseEndTrace(MemObjID, Ptr, CorrID); }};
188 #endif
189  Plugin->call<PiApiKind::piMemRelease>(Mem);
190  }
191 }
192 
193 void memBufferMapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Buffer,
194  pi_bool Blocking, pi_map_flags Flags, size_t Offset,
195  size_t Size, pi_uint32 NumEvents,
196  const pi_event *WaitList, pi_event *Event,
197  void **RetMap) {
198 #ifdef XPTI_ENABLE_INSTRUMENTATION
199  uint64_t CorrID = 0;
200  uintptr_t MemObjID = (uintptr_t)(Buffer);
201 #endif
202  // We only want to instrument piEnqueueMemBufferMap
203 
204 #ifdef XPTI_ENABLE_INSTRUMENTATION
205  CorrID = emitMemAllocBeginTrace(MemObjID, Size, 0 /* guard zone */);
206  xpti::utils::finally _{[&] {
207  emitMemAllocEndTrace(MemObjID, (uintptr_t)(*RetMap), Size,
208  0 /* guard zone */, CorrID);
209  }};
210 #endif
211  Plugin->call<PiApiKind::piEnqueueMemBufferMap>(Queue, Buffer, Blocking, Flags,
212  Offset, Size, NumEvents,
213  WaitList, Event, RetMap);
214 }
215 
216 void memUnmapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Mem,
217  void *MappedPtr, pi_uint32 NumEvents,
218  const pi_event *WaitList, pi_event *Event) {
219 #ifdef XPTI_ENABLE_INSTRUMENTATION
220  uint64_t CorrID = 0;
221  uintptr_t MemObjID = (uintptr_t)(Mem);
222  uintptr_t Ptr = (uintptr_t)(MappedPtr);
223 #endif
224  // We only want to instrument piEnqueueMemUnmap
225  {
226 #ifdef XPTI_ENABLE_INSTRUMENTATION
227  CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr);
228  xpti::utils::finally _{[&] {
229  // There's no way for SYCL to know, when the pointer is freed, so we have
230  // to explicitly wait for the end of data transfers here in order to
231  // report correct events.
232  // Always use call_nocheck here, because call may throw an exception,
233  // and this lambda will be called from destructor, which in combination
234  // rewards us with UB.
235  Plugin->call_nocheck<PiApiKind::piEventsWait>(1, Event);
236  emitMemReleaseEndTrace(MemObjID, Ptr, CorrID);
237  }};
238 #endif
239  Plugin->call<PiApiKind::piEnqueueMemUnmap>(Queue, Mem, MappedPtr, NumEvents,
240  WaitList, Event);
241  }
242 }
243 
245  void *MemAllocation,
246  std::vector<EventImplPtr> DepEvents,
247  sycl::detail::pi::PiEvent &OutEvent) {
248  // There is no async API for memory releasing. Explicitly wait for all
249  // dependency events and return empty event.
250  waitForEvents(DepEvents);
251  OutEvent = nullptr;
252  XPTIRegistry::bufferReleaseNotification(MemObj, MemAllocation);
253  MemObj->releaseMem(TargetContext, MemAllocation);
254 }
255 
257  SYCLMemObjI *MemObj, void *MemAllocation,
258  void *UserPtr) {
259  if (UserPtr == MemAllocation) {
260  // Do nothing as it's user provided memory.
261  return;
262  }
263 
264  if (TargetContext->is_host()) {
265  MemObj->releaseHostMem(MemAllocation);
266  return;
267  }
268 
269  const PluginPtr &Plugin = TargetContext->getPlugin();
270  memReleaseHelper(Plugin, pi::cast<sycl::detail::pi::PiMem>(MemAllocation));
271 }
272 
274  bool InitFromUserData, void *HostPtr,
275  std::vector<EventImplPtr> DepEvents,
276  sycl::detail::pi::PiEvent &OutEvent) {
277  // There is no async API for memory allocation. Explicitly wait for all
278  // dependency events and return empty event.
279  waitForEvents(DepEvents);
280  OutEvent = nullptr;
281 
282  return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr,
283  OutEvent);
284 }
285 
286 void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
287  bool HostPtrReadOnly, size_t Size,
288  const sycl::property_list &) {
289  std::ignore = HostPtrReadOnly;
290  std::ignore = Size;
291 
292  // Can return user pointer directly if it is not a nullptr.
293  if (UserPtr)
294  return UserPtr;
295 
296  return MemObj->allocateHostMem();
297  ;
298 }
299 
301  ContextImplPtr TargetContext, void *UserPtr,
302  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
303  const sycl::property_list &, sycl::detail::pi::PiEvent &OutEventToWait) {
304  (void)TargetContext;
305  (void)InteropContext;
306  // If memory object is created with interop c'tor return cl_mem as is.
307  assert(TargetContext == InteropContext && "Expected matching contexts");
308  OutEventToWait = InteropEvent->getHandleRef();
309  // Retain the event since it will be released during alloca command
310  // destruction
311  if (nullptr != OutEventToWait) {
312  const PluginPtr &Plugin = InteropEvent->getPlugin();
313  Plugin->call<PiApiKind::piEventRetain>(OutEventToWait);
314  }
315  return UserPtr;
316 }
317 
319 getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly) {
320  // Create read_write mem object to handle arbitrary uses.
323  if (UserPtr)
324  Result |= PI_MEM_FLAGS_HOST_PTR_USE;
325  return Result;
326 }
327 
329  ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly,
332  const sycl::property_list &) {
333  sycl::detail::pi::PiMemFlags CreationFlags =
334  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
335 
337  const PluginPtr &Plugin = TargetContext->getPlugin();
338  Plugin->call<PiApiKind::piMemImageCreate>(TargetContext->getHandleRef(),
339  CreationFlags, &Format, &Desc,
340  UserPtr, &NewMem);
341  return NewMem;
342 }
343 
344 void *
346  bool HostPtrReadOnly, const size_t Size,
347  const sycl::property_list &PropsList) {
348  sycl::detail::pi::PiMemFlags CreationFlags =
349  getMemObjCreationFlags(UserPtr, HostPtrReadOnly);
350  if (PropsList.has_property<
352  CreationFlags |= PI_MEM_FLAGS_HOST_PTR_ALLOC;
353 
354  sycl::detail::pi::PiMem NewMem = nullptr;
355  const PluginPtr &Plugin = TargetContext->getPlugin();
356 
357  std::vector<pi_mem_properties> AllocProps;
358 
360  TargetContext->isBufferLocationSupported()) {
361  auto Location =
363  .get_buffer_location();
364  AllocProps.reserve(AllocProps.size() + 2);
365  AllocProps.push_back(PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION);
366  AllocProps.push_back(Location);
367  }
368 
369  if (PropsList.has_property<property::buffer::mem_channel>()) {
370  auto Channel =
371  PropsList.get_property<property::buffer::mem_channel>().get_channel();
372  AllocProps.reserve(AllocProps.size() + 2);
373  AllocProps.push_back(PI_MEM_PROPERTIES_CHANNEL);
374  AllocProps.push_back(Channel);
375  }
376 
377  pi_mem_properties *AllocPropsPtr = nullptr;
378  if (!AllocProps.empty()) {
379  // If there are allocation properties, push an end to the list and update
380  // the properties pointer.
381  AllocProps.push_back(0);
382  AllocPropsPtr = AllocProps.data();
383  }
384 
385  memBufferCreateHelper(Plugin, TargetContext->getHandleRef(), CreationFlags,
386  Size, UserPtr, &NewMem, AllocPropsPtr);
387  return NewMem;
388 }
389 
391  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
392  bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent,
393  const ContextImplPtr &InteropContext, const sycl::property_list &PropsList,
394  sycl::detail::pi::PiEvent &OutEventToWait) {
395  void *MemPtr;
396  if (TargetContext->is_host())
397  MemPtr =
398  allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
399  else if (UserPtr && InteropContext)
400  MemPtr =
401  allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
402  InteropContext, PropsList, OutEventToWait);
403  else
404  MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
405  PropsList);
407  return MemPtr;
408 }
409 
411  ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr,
412  bool HostPtrReadOnly, size_t Size,
415  const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
416  const sycl::property_list &PropsList,
417  sycl::detail::pi::PiEvent &OutEventToWait) {
418  if (TargetContext->is_host())
419  return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
420  PropsList);
421  if (UserPtr && InteropContext)
422  return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
423  InteropContext, PropsList, OutEventToWait);
424  return allocateImageObject(TargetContext, UserPtr, HostPtrReadOnly, Desc,
425  Format, PropsList);
426 }
427 
429  void *ParentMemObj, size_t ElemSize,
430  size_t Offset, range<3> Range,
431  std::vector<EventImplPtr> DepEvents,
432  sycl::detail::pi::PiEvent &OutEvent) {
433  waitForEvents(DepEvents);
434  OutEvent = nullptr;
435 
436  if (TargetContext->is_host())
437  return static_cast<void *>(static_cast<char *>(ParentMemObj) + Offset);
438 
439  size_t SizeInBytes = ElemSize;
440  for (size_t I = 0; I < 3; ++I)
441  SizeInBytes *= Range[I];
442 
443  sycl::detail::pi::PiResult Error = PI_SUCCESS;
444  pi_buffer_region_struct Region{Offset, SizeInBytes};
446  const PluginPtr &Plugin = TargetContext->getPlugin();
447  Error = Plugin->call_nocheck<PiApiKind::piMemBufferPartition>(
448  pi::cast<sycl::detail::pi::PiMem>(ParentMemObj), PI_MEM_FLAGS_ACCESS_RW,
449  PI_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem);
450  if (Error == PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET)
451  throw invalid_object_error(
452  "Specified offset of the sub-buffer being constructed is not a "
453  "multiple of the memory base address alignment",
454  PI_ERROR_INVALID_VALUE);
455 
456  if (Error != PI_SUCCESS) {
457  Plugin->reportPiError(Error, "allocateMemSubBuffer()");
458  }
459 
460  return NewMem;
461 }
462 
464  int XTerm;
465  int YTerm;
466  int ZTerm;
467 };
470  // For buffers, the offsets/ranges coming from accessor are always
471  // id<3>/range<3> But their organization varies by dimension:
472  // 1 ==> {width, 1, 1}
473  // 2 ==> {height, width, 1}
474  // 3 ==> {depth, height, width}
475  // Some callers schedule 0 as DimDst/DimSrc.
476 
477  if (Type == detail::SYCLMemObjI::MemObjType::Buffer) {
478  if (Dimensions == 3) {
479  pos.XTerm = 2, pos.YTerm = 1, pos.ZTerm = 0;
480  } else if (Dimensions == 2) {
481  pos.XTerm = 1, pos.YTerm = 0, pos.ZTerm = 2;
482  } else { // Dimension is 1 or 0
483  pos.XTerm = 0, pos.YTerm = 1, pos.ZTerm = 2;
484  }
485  } else { // While range<>/id<> use by images is different than buffers, it's
486  // consistent with their accessors.
487  pos.XTerm = 0;
488  pos.YTerm = 1;
489  pos.ZTerm = 2;
490  }
491 }
492 
493 void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
494  unsigned int DimSrc, sycl::range<3> SrcSize,
495  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
496  unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem,
497  QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range<3> DstSize,
498  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
499  unsigned int DstElemSize,
500  std::vector<sycl::detail::pi::PiEvent> DepEvents,
501  sycl::detail::pi::PiEvent &OutEvent,
502  const detail::EventImplPtr &OutEventImpl) {
503  (void)SrcAccessRange;
504  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
505 
506  const sycl::detail::pi::PiQueue Queue = TgtQueue->getHandleRef();
507  const PluginPtr &Plugin = TgtQueue->getPlugin();
508 
509  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
510  TermPositions SrcPos, DstPos;
511  prepTermPositions(SrcPos, DimSrc, MemType);
512  prepTermPositions(DstPos, DimDst, MemType);
513 
514  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
515  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
516  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
517  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
518  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
519 
520  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
521  if (1 == DimDst && 1 == DimSrc) {
522  if (OutEventImpl != nullptr)
523  OutEventImpl->setHostEnqueueTime();
525  Queue, DstMem,
526  /*blocking_write=*/PI_FALSE, DstXOffBytes, DstAccessRangeWidthBytes,
527  SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
528  } else {
529  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
530  size_t BufferSlicePitch =
531  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
532  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
533  size_t HostSlicePitch =
534  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
535 
536  pi_buff_rect_offset_struct BufferOffset{
537  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
538  pi_buff_rect_offset_struct HostOffset{
539  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
540  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
541  DstAccessRange[DstPos.YTerm],
542  DstAccessRange[DstPos.ZTerm]};
543  if (OutEventImpl != nullptr)
544  OutEventImpl->setHostEnqueueTime();
546  Queue, DstMem,
547  /*blocking_write=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
548  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
549  SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
550  }
551  } else {
552  size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
553  size_t InputSlicePitch =
554  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
555 
556  pi_image_offset_struct Origin{DstOffset[DstPos.XTerm],
557  DstOffset[DstPos.YTerm],
558  DstOffset[DstPos.ZTerm]};
559  pi_image_region_struct Region{DstAccessRange[DstPos.XTerm],
560  DstAccessRange[DstPos.YTerm],
561  DstAccessRange[DstPos.ZTerm]};
562  if (OutEventImpl != nullptr)
563  OutEventImpl->setHostEnqueueTime();
564  Plugin->call<PiApiKind::piEnqueueMemImageWrite>(
565  Queue, DstMem,
566  /*blocking_write=*/PI_FALSE, &Origin, &Region, InputRowPitch,
567  InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent);
568  }
569 }
570 
571 void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
572  QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize,
573  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
574  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
575  unsigned int DimDst, sycl::range<3> DstSize,
576  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
577  unsigned int DstElemSize,
578  std::vector<sycl::detail::pi::PiEvent> DepEvents,
579  sycl::detail::pi::PiEvent &OutEvent,
580  const detail::EventImplPtr &OutEventImpl) {
581  (void)DstAccessRange;
582  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
583 
584  const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
585  const PluginPtr &Plugin = SrcQueue->getPlugin();
586 
587  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
588  TermPositions SrcPos, DstPos;
589  prepTermPositions(SrcPos, DimSrc, MemType);
590  prepTermPositions(DstPos, DimDst, MemType);
591 
592  // For a given buffer, the various mem copy routines (copyD2H, copyH2D,
593  // copyD2D) will usually have the same values for AccessRange, Size,
594  // Dimension, Offset, etc. EXCEPT when the dtor for ~SYCLMemObjT is called.
595  // Essentially, it schedules a copyBack of chars thus in copyD2H the
596  // Dimension will then be 1 and DstAccessRange[0] and DstSize[0] will be
597  // sized to bytes with a DstElemSize of 1.
598  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
599  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
600  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
601  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
602  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
603 
604  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
605  if (1 == DimDst && 1 == DimSrc) {
606  if (OutEventImpl != nullptr)
607  OutEventImpl->setHostEnqueueTime();
608  Plugin->call<PiApiKind::piEnqueueMemBufferRead>(
609  Queue, SrcMem,
610  /*blocking_read=*/PI_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes,
611  DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent);
612  } else {
613  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
614  size_t BufferSlicePitch =
615  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
616  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
617  size_t HostSlicePitch =
618  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
619 
620  pi_buff_rect_offset_struct BufferOffset{
621  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
622  pi_buff_rect_offset_struct HostOffset{
623  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
624  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
625  SrcAccessRange[SrcPos.YTerm],
626  SrcAccessRange[SrcPos.ZTerm]};
627  if (OutEventImpl != nullptr)
628  OutEventImpl->setHostEnqueueTime();
630  Queue, SrcMem,
631  /*blocking_read=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion,
632  BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch,
633  DstMem, DepEvents.size(), DepEvents.data(), &OutEvent);
634  }
635  } else {
636  size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
637  size_t SlicePitch =
638  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
639 
640  pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm],
641  SrcOffset[SrcPos.YTerm],
642  SrcOffset[SrcPos.ZTerm]};
643  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
644  SrcAccessRange[SrcPos.YTerm],
645  SrcAccessRange[SrcPos.ZTerm]};
646  if (OutEventImpl != nullptr)
647  OutEventImpl->setHostEnqueueTime();
648  Plugin->call<PiApiKind::piEnqueueMemImageRead>(
649  Queue, SrcMem, PI_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
650  DepEvents.size(), DepEvents.data(), &OutEvent);
651  }
652 }
653 
654 void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
655  QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize,
656  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
657  unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem,
658  QueueImplPtr, unsigned int DimDst, sycl::range<3> DstSize,
659  sycl::range<3>, sycl::id<3> DstOffset, unsigned int DstElemSize,
660  std::vector<sycl::detail::pi::PiEvent> DepEvents,
661  sycl::detail::pi::PiEvent &OutEvent,
662  const detail::EventImplPtr &OutEventImpl) {
663  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
664 
665  const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
666  const PluginPtr &Plugin = SrcQueue->getPlugin();
667 
668  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
669  TermPositions SrcPos, DstPos;
670  prepTermPositions(SrcPos, DimSrc, MemType);
671  prepTermPositions(DstPos, DimDst, MemType);
672 
673  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
674  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
675  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
676  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
677  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
678 
679  if (MemType == detail::SYCLMemObjI::MemObjType::Buffer) {
680  if (1 == DimDst && 1 == DimSrc) {
681  if (OutEventImpl != nullptr)
682  OutEventImpl->setHostEnqueueTime();
683  Plugin->call<PiApiKind::piEnqueueMemBufferCopy>(
684  Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
685  SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(),
686  &OutEvent);
687  } else {
688  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
689  // calculate both src and dest pitch using region[0], which is not correct
690  // if src and dest are not the same size.
691  size_t SrcRowPitch = SrcSzWidthBytes;
692  size_t SrcSlicePitch = (DimSrc <= 1)
693  ? SrcSzWidthBytes
694  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
695  size_t DstRowPitch = DstSzWidthBytes;
696  size_t DstSlicePitch = (DimDst <= 1)
697  ? DstSzWidthBytes
698  : DstSzWidthBytes * DstSize[DstPos.YTerm];
699 
700  pi_buff_rect_offset_struct SrcOrigin{
701  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
702  pi_buff_rect_offset_struct DstOrigin{
703  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
704  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
705  SrcAccessRange[SrcPos.YTerm],
706  SrcAccessRange[SrcPos.ZTerm]};
707  if (OutEventImpl != nullptr)
708  OutEventImpl->setHostEnqueueTime();
710  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
711  SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(),
712  DepEvents.data(), &OutEvent);
713  }
714  } else {
715  pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm],
716  SrcOffset[SrcPos.YTerm],
717  SrcOffset[SrcPos.ZTerm]};
718  pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm],
719  DstOffset[DstPos.YTerm],
720  DstOffset[DstPos.ZTerm]};
721  pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm],
722  SrcAccessRange[SrcPos.YTerm],
723  SrcAccessRange[SrcPos.ZTerm]};
724  if (OutEventImpl != nullptr)
725  OutEventImpl->setHostEnqueueTime();
726  Plugin->call<PiApiKind::piEnqueueMemImageCopy>(
727  Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
728  DepEvents.size(), DepEvents.data(), &OutEvent);
729  }
730 }
731 
732 static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr,
733  unsigned int DimSrc, sycl::range<3> SrcSize,
734  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
735  unsigned int SrcElemSize, char *DstMem, QueueImplPtr,
736  unsigned int DimDst, sycl::range<3> DstSize,
737  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
738  unsigned int DstElemSize,
739  std::vector<sycl::detail::pi::PiEvent>,
741  if ((DimSrc != 1 || DimDst != 1) &&
742  (SrcOffset != id<3>{0, 0, 0} || DstOffset != id<3>{0, 0, 0} ||
743  SrcSize != SrcAccessRange || DstSize != DstAccessRange)) {
744  throw runtime_error("Not supported configuration of memcpy requested",
745  PI_ERROR_INVALID_OPERATION);
746  }
747 
748  SrcMem += SrcOffset[0] * SrcElemSize;
749  DstMem += DstOffset[0] * DstElemSize;
750 
751  if (SrcMem == DstMem)
752  return;
753 
754  size_t BytesToCopy =
755  SrcAccessRange[0] * SrcElemSize * SrcAccessRange[1] * SrcAccessRange[2];
756  std::memcpy(DstMem, SrcMem, BytesToCopy);
757 }
758 
759 // Copies memory between: host and device, host and host,
760 // device and device if memory objects bound to the one context.
761 void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
762  QueueImplPtr SrcQueue, unsigned int DimSrc,
763  sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange,
764  sycl::id<3> SrcOffset, unsigned int SrcElemSize,
765  void *DstMem, QueueImplPtr TgtQueue,
766  unsigned int DimDst, sycl::range<3> DstSize,
767  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
768  unsigned int DstElemSize,
769  std::vector<sycl::detail::pi::PiEvent> DepEvents,
770  sycl::detail::pi::PiEvent &OutEvent,
771  const detail::EventImplPtr &OutEventImpl) {
772 
773  if (SrcQueue->is_host()) {
774  if (TgtQueue->is_host())
775  copyH2H(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
776  SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem,
777  std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
778  DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
779  else
780  copyH2D(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
781  SrcAccessRange, SrcOffset, SrcElemSize,
782  pi::cast<sycl::detail::pi::PiMem>(DstMem), std::move(TgtQueue),
783  DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
784  std::move(DepEvents), OutEvent, OutEventImpl);
785  } else {
786  if (TgtQueue->is_host())
787  copyD2H(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
788  std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
789  SrcElemSize, (char *)DstMem, std::move(TgtQueue), DimDst, DstSize,
790  DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
791  OutEvent, OutEventImpl);
792  else
793  copyD2D(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
794  std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
795  SrcElemSize, pi::cast<sycl::detail::pi::PiMem>(DstMem),
796  std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
797  DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
798  }
799 }
800 
801 // TODO: This function will remain until ABI-breaking change
802 void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
803  QueueImplPtr SrcQueue, unsigned int DimSrc,
804  sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange,
805  sycl::id<3> SrcOffset, unsigned int SrcElemSize,
806  void *DstMem, QueueImplPtr TgtQueue,
807  unsigned int DimDst, sycl::range<3> DstSize,
808  sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
809  unsigned int DstElemSize,
810  std::vector<sycl::detail::pi::PiEvent> DepEvents,
811  sycl::detail::pi::PiEvent &OutEvent) {
812  MemoryManager::copy(SYCLMemObj, SrcMem, SrcQueue, DimSrc, SrcSize,
813  SrcAccessRange, SrcOffset, SrcElemSize, DstMem, TgtQueue,
814  DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
815  DepEvents, OutEvent, nullptr);
816 }
817 
818 void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
819  size_t PatternSize, const char *Pattern,
820  unsigned int Dim, sycl::range<3> MemRange,
821  sycl::range<3> AccRange, sycl::id<3> Offset,
822  unsigned int ElementSize,
823  std::vector<sycl::detail::pi::PiEvent> DepEvents,
824  sycl::detail::pi::PiEvent &OutEvent,
825  const detail::EventImplPtr &OutEventImpl) {
826  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
827 
828  const PluginPtr &Plugin = Queue->getPlugin();
829 
830  if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::Buffer) {
831  if (OutEventImpl != nullptr)
832  OutEventImpl->setHostEnqueueTime();
833 
834  // 2D and 3D buffers accessors can't have custom range or the data will
835  // likely be discontiguous.
836  bool RangesUsable = (Dim <= 1) || (MemRange == AccRange);
837  // For 2D and 3D buffers, the offset must be 0, or the data will be
838  // discontiguous.
839  bool OffsetUsable = (Dim <= 1) || (Offset == sycl::id<3>{0, 0, 0});
840  size_t RangeMultiplier = AccRange[0] * AccRange[1] * AccRange[2];
841 
842  if (RangesUsable && OffsetUsable) {
843  Plugin->call<PiApiKind::piEnqueueMemBufferFill>(
844  Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem),
845  Pattern, PatternSize, Offset[0] * ElementSize,
846  RangeMultiplier * ElementSize, DepEvents.size(), DepEvents.data(),
847  &OutEvent);
848  return;
849  }
850  // The sycl::handler uses a parallel_for kernel in the case of unusable
851  // Range or Offset, not CG:Fill. So we should not be here.
852  throw runtime_error("Not supported configuration of fill requested",
853  PI_ERROR_INVALID_OPERATION);
854  } else {
855  if (OutEventImpl != nullptr)
856  OutEventImpl->setHostEnqueueTime();
857  // images don't support offset accessors and thus avoid issues of
858  // discontinguous data
859  Plugin->call<PiApiKind::piEnqueueMemImageFill>(
860  Queue->getHandleRef(), pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
861  &Offset[0], &AccRange[0], DepEvents.size(), DepEvents.data(),
862  &OutEvent);
863  }
864 }
865 
866 // TODO: This function will remain until ABI-breaking change
867 void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
868  size_t PatternSize, const char *Pattern,
869  unsigned int Dim, sycl::range<3> Size,
870  sycl::range<3> Range, sycl::id<3> Offset,
871  unsigned int ElementSize,
872  std::vector<sycl::detail::pi::PiEvent> DepEvents,
873  sycl::detail::pi::PiEvent &OutEvent) {
874  MemoryManager::fill(SYCLMemObj, Mem, Queue, PatternSize, Pattern, Dim, Size,
875  Range, Offset, ElementSize, DepEvents, OutEvent, nullptr);
876 }
877 
878 void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
879  access::mode AccessMode, unsigned int, sycl::range<3>,
880  sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
881  unsigned int ElementSize,
882  std::vector<sycl::detail::pi::PiEvent> DepEvents,
883  sycl::detail::pi::PiEvent &OutEvent) {
884  if (Queue->is_host()) {
885  throw runtime_error("Not supported configuration of map requested",
886  PI_ERROR_INVALID_OPERATION);
887  }
888 
889  pi_map_flags Flags = 0;
890 
891  switch (AccessMode) {
892  case access::mode::read:
893  Flags |= PI_MAP_READ;
894  break;
895  case access::mode::write:
896  Flags |= PI_MAP_WRITE;
897  break;
899  case access::mode::atomic:
900  Flags = PI_MAP_WRITE | PI_MAP_READ;
901  break;
902  case access::mode::discard_write:
903  case access::mode::discard_read_write:
905  break;
906  }
907 
908  AccessOffset[0] *= ElementSize;
909  AccessRange[0] *= ElementSize;
910 
911  // TODO: Handle offset
912  assert(AccessOffset[0] == 0 && "Handle offset");
913 
914  void *MappedPtr = nullptr;
915  const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2];
916  const PluginPtr &Plugin = Queue->getPlugin();
917  memBufferMapHelper(Plugin, Queue->getHandleRef(),
918  pi::cast<sycl::detail::pi::PiMem>(Mem), PI_FALSE, Flags,
919  AccessOffset[0], BytesToMap, DepEvents.size(),
920  DepEvents.data(), &OutEvent, &MappedPtr);
921  return MappedPtr;
922 }
923 
924 void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
925  void *MappedPtr,
926  std::vector<sycl::detail::pi::PiEvent> DepEvents,
927  sycl::detail::pi::PiEvent &OutEvent) {
928 
929  // Host queue is not supported here.
930  // All DepEvents are to the same Context.
931  // Using the plugin of the Queue.
932 
933  const PluginPtr &Plugin = Queue->getPlugin();
934  memUnmapHelper(Plugin, Queue->getHandleRef(),
935  pi::cast<sycl::detail::pi::PiMem>(Mem), MappedPtr,
936  DepEvents.size(), DepEvents.data(), &OutEvent);
937 }
938 
939 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
940  size_t Len, void *DstMem,
941  std::vector<sycl::detail::pi::PiEvent> DepEvents,
942  sycl::detail::pi::PiEvent *OutEvent,
943  const detail::EventImplPtr &OutEventImpl) {
944  assert(!SrcQueue->getContextImplPtr()->is_host() &&
945  "Host queue not supported in fill_usm.");
946 
947  if (!Len) { // no-op, but ensure DepEvents will still be waited on
948  if (!DepEvents.empty()) {
949  if (OutEventImpl != nullptr)
950  OutEventImpl->setHostEnqueueTime();
951  SrcQueue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
952  SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(),
953  OutEvent);
954  }
955  return;
956  }
957 
958  if (!SrcMem || !DstMem)
959  throw runtime_error("NULL pointer argument in memory copy operation.",
960  PI_ERROR_INVALID_VALUE);
961 
962  const PluginPtr &Plugin = SrcQueue->getPlugin();
963  if (OutEventImpl != nullptr)
964  OutEventImpl->setHostEnqueueTime();
965  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy>(
966  SrcQueue->getHandleRef(),
967  /* blocking */ PI_FALSE, DstMem, SrcMem, Len, DepEvents.size(),
968  DepEvents.data(), OutEvent);
969 }
970 
971 // TODO: This function will remain until ABI-breaking change
972 void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
973  size_t Len, void *DstMem,
974  std::vector<sycl::detail::pi::PiEvent> DepEvents,
975  sycl::detail::pi::PiEvent *OutEvent) {
976  MemoryManager::copy_usm(SrcMem, SrcQueue, Len, DstMem, DepEvents, OutEvent,
977  nullptr);
978 }
979 
980 void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
981  int Pattern,
982  std::vector<sycl::detail::pi::PiEvent> DepEvents,
983  sycl::detail::pi::PiEvent *OutEvent,
984  const detail::EventImplPtr &OutEventImpl) {
985  assert(!Queue->getContextImplPtr()->is_host() &&
986  "Host queue not supported in fill_usm.");
987 
988  if (!Length) { // no-op, but ensure DepEvents will still be waited on
989  if (!DepEvents.empty()) {
990  if (OutEventImpl != nullptr)
991  OutEventImpl->setHostEnqueueTime();
992  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
993  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
994  }
995  return;
996  }
997 
998  if (!Mem)
999  throw runtime_error("NULL pointer argument in memory fill operation.",
1000  PI_ERROR_INVALID_VALUE);
1001  if (OutEventImpl != nullptr)
1002  OutEventImpl->setHostEnqueueTime();
1003  const PluginPtr &Plugin = Queue->getPlugin();
1004  Plugin->call<PiApiKind::piextUSMEnqueueMemset>(
1005  Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(),
1006  DepEvents.data(), OutEvent);
1007 }
1008 
1009 // TODO: This function will remain until ABI-breaking change
1010 void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
1011  int Pattern,
1012  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1013  sycl::detail::pi::PiEvent *OutEvent) {
1014  MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent,
1015  nullptr); // OutEventImpl);
1016 }
1017 
1018 void MemoryManager::prefetch_usm(
1019  void *Mem, QueueImplPtr Queue, size_t Length,
1020  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1021  sycl::detail::pi::PiEvent *OutEvent,
1022  const detail::EventImplPtr &OutEventImpl) {
1023  assert(!Queue->getContextImplPtr()->is_host() &&
1024  "Host queue not supported in prefetch_usm.");
1025 
1026  const PluginPtr &Plugin = Queue->getPlugin();
1027  if (OutEventImpl != nullptr)
1028  OutEventImpl->setHostEnqueueTime();
1029  Plugin->call<PiApiKind::piextUSMEnqueuePrefetch>(
1030  Queue->getHandleRef(), Mem, Length, _pi_usm_migration_flags(0),
1031  DepEvents.size(), DepEvents.data(), OutEvent);
1032 }
1033 
1034 // TODO: This function will remain until ABI-breaking change
1035 void MemoryManager::prefetch_usm(
1036  void *Mem, QueueImplPtr Queue, size_t Length,
1037  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1038  sycl::detail::pi::PiEvent *OutEvent) {
1039  MemoryManager::prefetch_usm(Mem, Queue, Length, DepEvents, OutEvent, nullptr);
1040 }
1041 
1042 void MemoryManager::advise_usm(
1043  const void *Mem, QueueImplPtr Queue, size_t Length, pi_mem_advice Advice,
1044  std::vector<sycl::detail::pi::PiEvent> /*DepEvents*/,
1045  sycl::detail::pi::PiEvent *OutEvent,
1046  const detail::EventImplPtr &OutEventImpl) {
1047  assert(!Queue->getContextImplPtr()->is_host() &&
1048  "Host queue not supported in advise_usm.");
1049 
1050  const PluginPtr &Plugin = Queue->getPlugin();
1051  if (OutEventImpl != nullptr)
1052  OutEventImpl->setHostEnqueueTime();
1053  Plugin->call<PiApiKind::piextUSMEnqueueMemAdvise>(Queue->getHandleRef(), Mem,
1054  Length, Advice, OutEvent);
1055 }
1056 
1057 // TODO: This function will remain until ABI-breaking change
1058 void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue,
1059  size_t Length, pi_mem_advice Advice,
1060  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1061  sycl::detail::pi::PiEvent *OutEvent) {
1062  MemoryManager::advise_usm(Mem, Queue, Length, Advice, DepEvents, OutEvent,
1063  nullptr);
1064 }
1065 
1066 void MemoryManager::copy_2d_usm(
1067  const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem,
1068  size_t DstPitch, size_t Width, size_t Height,
1069  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1070  sycl::detail::pi::PiEvent *OutEvent,
1071  const detail::EventImplPtr &OutEventImpl) {
1072  assert(!Queue->getContextImplPtr()->is_host() &&
1073  "Host queue not supported in copy_2d_usm.");
1074 
1075  if (Width == 0 || Height == 0) {
1076  // no-op, but ensure DepEvents will still be waited on
1077  if (!DepEvents.empty()) {
1078  if (OutEventImpl != nullptr)
1079  OutEventImpl->setHostEnqueueTime();
1080  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1081  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1082  }
1083  return;
1084  }
1085 
1086  if (!DstMem || !SrcMem)
1087  throw sycl::exception(sycl::make_error_code(errc::invalid),
1088  "NULL pointer argument in 2D memory copy operation.");
1089 
1090  const PluginPtr &Plugin = Queue->getPlugin();
1091 
1092  pi_bool SupportsUSMMemcpy2D = false;
1094  Queue->getContextImplPtr()->getHandleRef(),
1096  &SupportsUSMMemcpy2D, nullptr);
1097 
1098  if (SupportsUSMMemcpy2D) {
1099  if (OutEventImpl != nullptr)
1100  OutEventImpl->setHostEnqueueTime();
1101  // Direct memcpy2D is supported so we use this function.
1102  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy2D>(
1103  Queue->getHandleRef(), /*blocking=*/PI_FALSE, DstMem, DstPitch, SrcMem,
1104  SrcPitch, Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1105  return;
1106  }
1107 
1108  // Otherwise we allow the special case where the copy is to or from host.
1109 #ifndef NDEBUG
1110  context Ctx = createSyclObjFromImpl<context>(Queue->getContextImplPtr());
1111  usm::alloc SrcAllocType = get_pointer_type(SrcMem, Ctx);
1112  usm::alloc DstAllocType = get_pointer_type(DstMem, Ctx);
1113  bool SrcIsHost =
1114  SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
1115  bool DstIsHost =
1116  DstAllocType == usm::alloc::unknown || DstAllocType == usm::alloc::host;
1117  assert((SrcIsHost || DstIsHost) && "In fallback path for copy_2d_usm either "
1118  "source or destination must be on host.");
1119 #endif // NDEBUG
1120 
1121  // The fallback in this case is to insert a copy per row.
1122  std::vector<OwnedPiEvent> CopyEventsManaged;
1123  CopyEventsManaged.reserve(Height);
1124  // We'll need continuous range of events for a wait later as well.
1125  std::vector<sycl::detail::pi::PiEvent> CopyEvents(Height);
1126  if (OutEventImpl != nullptr)
1127  OutEventImpl->setHostEnqueueTime();
1128  for (size_t I = 0; I < Height; ++I) {
1129  char *DstItBegin = static_cast<char *>(DstMem) + I * DstPitch;
1130  const char *SrcItBegin = static_cast<const char *>(SrcMem) + I * SrcPitch;
1131  Plugin->call<PiApiKind::piextUSMEnqueueMemcpy>(
1132  Queue->getHandleRef(), /* blocking */ PI_FALSE, DstItBegin, SrcItBegin,
1133  Width, DepEvents.size(), DepEvents.data(), CopyEvents.data() + I);
1134  CopyEventsManaged.emplace_back(CopyEvents[I], Plugin,
1135  /*TakeOwnership=*/true);
1136  }
1137  if (OutEventImpl != nullptr)
1138  OutEventImpl->setHostEnqueueTime();
1139  // Then insert a wait to coalesce the copy events.
1140  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1141  Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent);
1142 }
1143 
1144 // TODO: This function will remain until ABI-breaking change
1145 void MemoryManager::copy_2d_usm(
1146  const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem,
1147  size_t DstPitch, size_t Width, size_t Height,
1148  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1149  sycl::detail::pi::PiEvent *OutEvent) {
1150  MemoryManager::copy_2d_usm(SrcMem, SrcPitch, Queue, DstMem, DstPitch, Width,
1151  Height, DepEvents, OutEvent, nullptr);
1152 }
1153 
1154 void MemoryManager::fill_2d_usm(
1155  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1156  const std::vector<char> &Pattern,
1157  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1158  sycl::detail::pi::PiEvent *OutEvent,
1159  const detail::EventImplPtr &OutEventImpl) {
1160  assert(!Queue->getContextImplPtr()->is_host() &&
1161  "Host queue not supported in fill_2d_usm.");
1162 
1163  if (Width == 0 || Height == 0) {
1164  // no-op, but ensure DepEvents will still be waited on
1165  if (!DepEvents.empty()) {
1166  if (OutEventImpl != nullptr)
1167  OutEventImpl->setHostEnqueueTime();
1168  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1169  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1170  }
1171  return;
1172  }
1173 
1174  if (!DstMem)
1175  throw sycl::exception(sycl::make_error_code(errc::invalid),
1176  "NULL pointer argument in 2D memory fill operation.");
1177  if (OutEventImpl != nullptr)
1178  OutEventImpl->setHostEnqueueTime();
1179  const PluginPtr &Plugin = Queue->getPlugin();
1180  Plugin->call<PiApiKind::piextUSMEnqueueFill2D>(
1181  Queue->getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(),
1182  Width, Height, DepEvents.size(), DepEvents.data(), OutEvent);
1183 }
1184 
1185 // TODO: This function will remain until ABI-breaking change
1186 void MemoryManager::fill_2d_usm(
1187  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1188  const std::vector<char> &Pattern,
1189  std::vector<sycl::detail::pi::PiEvent> DepEvents,
1190  sycl::detail::pi::PiEvent *OutEvent) {
1191  MemoryManager::fill_2d_usm(DstMem, Queue, Pitch, Width, Height, Pattern,
1192  DepEvents, OutEvent, nullptr);
1193 }
1194 
1195 void MemoryManager::memset_2d_usm(
1196  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1197  char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1198  sycl::detail::pi::PiEvent *OutEvent,
1199  const detail::EventImplPtr &OutEventImpl) {
1200  assert(!Queue->getContextImplPtr()->is_host() &&
1201  "Host queue not supported in fill_2d_usm.");
1202 
1203  if (Width == 0 || Height == 0) {
1204  // no-op, but ensure DepEvents will still be waited on
1205  if (!DepEvents.empty()) {
1206  if (OutEventImpl != nullptr)
1207  OutEventImpl->setHostEnqueueTime();
1208  Queue->getPlugin()->call<PiApiKind::piEnqueueEventsWait>(
1209  Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent);
1210  }
1211  return;
1212  }
1213 
1214  if (!DstMem)
1215  throw sycl::exception(
1216  sycl::make_error_code(errc::invalid),
1217  "NULL pointer argument in 2D memory memset operation.");
1218  if (OutEventImpl != nullptr)
1219  OutEventImpl->setHostEnqueueTime();
1220  const PluginPtr &Plugin = Queue->getPlugin();
1221  Plugin->call<PiApiKind::piextUSMEnqueueMemset2D>(
1222  Queue->getHandleRef(), DstMem, Pitch, static_cast<int>(Value), Width,
1223  Height, DepEvents.size(), DepEvents.data(), OutEvent);
1224 }
1225 
1226 // TODO: This function will remain until ABI-breaking change
1227 void MemoryManager::memset_2d_usm(
1228  void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height,
1229  char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
1230  sycl::detail::pi::PiEvent *OutEvent) {
1231  MemoryManager::memset_2d_usm(DstMem, Queue, Pitch, Width, Height, Value,
1232  DepEvents, OutEvent, nullptr);
1233 }
1234 
1235 static void
1237  DeviceGlobalMapEntry *DeviceGlobalEntry,
1238  size_t NumBytes, size_t Offset, const void *Src,
1239  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1240  sycl::detail::pi::PiEvent *OutEvent,
1241  const detail::EventImplPtr &OutEventImpl) {
1242  // Get or allocate USM memory for the device_global.
1243  DeviceGlobalUSMMem &DeviceGlobalUSM =
1244  DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
1245  void *Dest = DeviceGlobalUSM.getPtr();
1246 
1247  // OwnedPiEvent will keep the initialization event alive for the duration
1248  // of this function call.
1249  OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin());
1250 
1251  // We may need addtional events, so create a non-const dependency events list
1252  // to use if we need to modify it.
1253  std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1254  const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1255  ZIEvent ? AuxDepEventsStorage : DepEvents;
1256 
1257  // If there is a zero-initializer event the memory operation should wait for
1258  // it.
1259  if (ZIEvent) {
1260  AuxDepEventsStorage = DepEvents;
1261  AuxDepEventsStorage.push_back(ZIEvent.GetEvent());
1262  }
1263 
1264  MemoryManager::copy_usm(Src, Queue, NumBytes,
1265  reinterpret_cast<char *>(Dest) + Offset,
1266  ActualDepEvents, OutEvent, OutEventImpl);
1267 }
1268 
1270  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1271  size_t NumBytes, size_t Offset, void *Dest,
1272  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1273  sycl::detail::pi::PiEvent *OutEvent,
1274  const detail::EventImplPtr &OutEventImpl) {
1275  // Get or allocate USM memory for the device_global. Since we are reading from
1276  // it, we need it initialized if it has not been yet.
1277  DeviceGlobalUSMMem &DeviceGlobalUSM =
1278  DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
1279  void *Src = DeviceGlobalUSM.getPtr();
1280 
1281  // OwnedPiEvent will keep the initialization event alive for the duration
1282  // of this function call.
1283  OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin());
1284 
1285  // We may need addtional events, so create a non-const dependency events list
1286  // to use if we need to modify it.
1287  std::vector<sycl::detail::pi::PiEvent> AuxDepEventsStorage;
1288  const std::vector<sycl::detail::pi::PiEvent> &ActualDepEvents =
1289  ZIEvent ? AuxDepEventsStorage : DepEvents;
1290 
1291  // If there is a zero-initializer event the memory operation should wait for
1292  // it.
1293  if (ZIEvent) {
1294  AuxDepEventsStorage = DepEvents;
1295  AuxDepEventsStorage.push_back(ZIEvent.GetEvent());
1296  }
1297 
1298  MemoryManager::copy_usm(reinterpret_cast<const char *>(Src) + Offset, Queue,
1299  NumBytes, Dest, ActualDepEvents, OutEvent,
1300  OutEventImpl);
1301 }
1302 
1305  DeviceGlobalMapEntry *DeviceGlobalEntry) {
1306  assert(DeviceGlobalEntry->MIsDeviceImageScopeDecorated &&
1307  "device_global is not device image scope decorated.");
1308 
1309  // If the device global is used in multiple device images we cannot proceed.
1310  if (DeviceGlobalEntry->MImageIdentifiers.size() > 1)
1311  throw sycl::exception(make_error_code(errc::invalid),
1312  "More than one image exists with the device_global.");
1313 
1314  // If there are no kernels using the device_global we cannot proceed.
1315  if (DeviceGlobalEntry->MImageIdentifiers.size() == 0)
1316  throw sycl::exception(make_error_code(errc::invalid),
1317  "No image exists with the device_global.");
1318 
1319  // Look for cached programs with the device_global.
1320  device Device = Queue->get_device();
1321  ContextImplPtr ContextImpl = Queue->getContextImplPtr();
1322  std::optional<sycl::detail::pi::PiProgram> CachedProgram =
1323  ContextImpl->getProgramForDeviceGlobal(Device, DeviceGlobalEntry);
1324  if (CachedProgram)
1325  return *CachedProgram;
1326 
1327  // If there was no cached program, build one.
1328  auto Context = createSyclObjFromImpl<context>(ContextImpl);
1329  ProgramManager &PM = ProgramManager::getInstance();
1330  RTDeviceBinaryImage &Img =
1331  PM.getDeviceImage(DeviceGlobalEntry->MImages, Context, Device);
1332  device_image_plain DeviceImage =
1333  PM.getDeviceImageFromBinaryImage(&Img, Context, Device);
1334  device_image_plain BuiltImage = PM.build(DeviceImage, {Device}, {});
1335  return getSyclObjImpl(BuiltImage)->get_program_ref();
1336 }
1337 
1339  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1340  size_t NumBytes, size_t Offset, const void *Src,
1341  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1342  sycl::detail::pi::PiEvent *OutEvent) {
1343  sycl::detail::pi::PiProgram Program =
1344  getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
1345  const PluginPtr &Plugin = Queue->getPlugin();
1347  Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(),
1348  false, NumBytes, Offset, Src, DepEvents.size(), DepEvents.data(),
1349  OutEvent);
1350 }
1351 
1353  QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry,
1354  size_t NumBytes, size_t Offset, void *Dest,
1355  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1356  sycl::detail::pi::PiEvent *OutEvent) {
1357  sycl::detail::pi::PiProgram Program =
1358  getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
1359  const PluginPtr &Plugin = Queue->getPlugin();
1361  Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(),
1362  false, NumBytes, Offset, Dest, DepEvents.size(), DepEvents.data(),
1363  OutEvent);
1364 }
1365 
1366 void MemoryManager::copy_to_device_global(
1367  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1368  size_t NumBytes, size_t Offset, const void *SrcMem,
1369  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1370  sycl::detail::pi::PiEvent *OutEvent,
1371  const detail::EventImplPtr &OutEventImpl) {
1372  DeviceGlobalMapEntry *DGEntry =
1373  detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1374  DeviceGlobalPtr);
1375  assert(DGEntry &&
1376  DGEntry->MIsDeviceImageScopeDecorated == IsDeviceImageScoped &&
1377  "Invalid copy operation for device_global.");
1378  assert(DGEntry->MDeviceGlobalTSize >= Offset + NumBytes &&
1379  "Copy to device_global is out of bounds.");
1380 
1381  if (IsDeviceImageScoped)
1382  memcpyToDeviceGlobalDirect(Queue, DGEntry, NumBytes, Offset, SrcMem,
1383  DepEvents, OutEvent);
1384  else
1385  memcpyToDeviceGlobalUSM(Queue, DGEntry, NumBytes, Offset, SrcMem, DepEvents,
1386  OutEvent, OutEventImpl);
1387 }
1388 
1389 // TODO: This function will remain until ABI-breaking change
1390 void MemoryManager::copy_to_device_global(
1391  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1392  size_t NumBytes, size_t Offset, const void *SrcMem,
1393  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1394  sycl::detail::pi::PiEvent *OutEvent) {
1395  copy_to_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes,
1396  Offset, SrcMem, DepEvents, OutEvent, nullptr);
1397 }
1398 
1399 void MemoryManager::copy_from_device_global(
1400  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1401  size_t NumBytes, size_t Offset, void *DstMem,
1402  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1403  sycl::detail::pi::PiEvent *OutEvent,
1404  const detail::EventImplPtr &OutEventImpl) {
1405  DeviceGlobalMapEntry *DGEntry =
1406  detail::ProgramManager::getInstance().getDeviceGlobalEntry(
1407  DeviceGlobalPtr);
1408  assert(DGEntry &&
1409  DGEntry->MIsDeviceImageScopeDecorated == IsDeviceImageScoped &&
1410  "Invalid copy operation for device_global.");
1411  assert(DGEntry->MDeviceGlobalTSize >= Offset + NumBytes &&
1412  "Copy from device_global is out of bounds.");
1413 
1414  if (IsDeviceImageScoped)
1415  memcpyFromDeviceGlobalDirect(Queue, DGEntry, NumBytes, Offset, DstMem,
1416  DepEvents, OutEvent);
1417  else
1418  memcpyFromDeviceGlobalUSM(Queue, DGEntry, NumBytes, Offset, DstMem,
1419  DepEvents, OutEvent, OutEventImpl);
1420 }
1421 
1422 // TODO: This function will remain until ABI-breaking change
1423 void MemoryManager::copy_from_device_global(
1424  const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue,
1425  size_t NumBytes, size_t Offset, void *DstMem,
1426  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1427  sycl::detail::pi::PiEvent *OutEvent) {
1428  copy_from_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes,
1429  Offset, DstMem, DepEvents, OutEvent, nullptr);
1430 }
1431 
1432 // Command buffer methods
1433 void MemoryManager::ext_oneapi_copyD2D_cmd_buffer(
1435  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1436  void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1437  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
1438  unsigned int SrcElemSize, void *DstMem, unsigned int DimDst,
1439  sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
1440  sycl::id<3> DstOffset, unsigned int DstElemSize,
1441  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1442  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1443  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1444  (void)DstAccessRange;
1445 
1446  const PluginPtr &Plugin = Context->getPlugin();
1447 
1448  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1449  TermPositions SrcPos, DstPos;
1450  prepTermPositions(SrcPos, DimSrc, MemType);
1451  prepTermPositions(DstPos, DimDst, MemType);
1452 
1453  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1454  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1455  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
1456  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1457  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1458 
1459  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1460  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1461  "Images are not supported in Graphs");
1462  }
1463 
1464  if (1 == DimDst && 1 == DimSrc) {
1466  CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1467  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), SrcXOffBytes,
1468  DstXOffBytes, SrcAccessRangeWidthBytes, Deps.size(), Deps.data(),
1469  OutSyncPoint);
1470  } else {
1471  // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
1472  // calculate both src and dest pitch using region[0], which is not correct
1473  // if src and dest are not the same size.
1474  size_t SrcRowPitch = SrcSzWidthBytes;
1475  size_t SrcSlicePitch = (DimSrc <= 1)
1476  ? SrcSzWidthBytes
1477  : SrcSzWidthBytes * SrcSize[SrcPos.YTerm];
1478  size_t DstRowPitch = DstSzWidthBytes;
1479  size_t DstSlicePitch = (DimDst <= 1)
1480  ? DstSzWidthBytes
1481  : DstSzWidthBytes * DstSize[DstPos.YTerm];
1482 
1483  pi_buff_rect_offset_struct SrcOrigin{SrcXOffBytes, SrcOffset[SrcPos.YTerm],
1484  SrcOffset[SrcPos.ZTerm]};
1485  pi_buff_rect_offset_struct DstOrigin{DstXOffBytes, DstOffset[DstPos.YTerm],
1486  DstOffset[DstPos.ZTerm]};
1487  pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
1488  SrcAccessRange[SrcPos.YTerm],
1489  SrcAccessRange[SrcPos.ZTerm]};
1490 
1492  CommandBuffer, sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1493  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem), &SrcOrigin,
1494  &DstOrigin, &Region, SrcRowPitch, SrcSlicePitch, DstRowPitch,
1495  DstSlicePitch, Deps.size(), Deps.data(), OutSyncPoint);
1496  }
1497 }
1498 
1499 void MemoryManager::ext_oneapi_copyD2H_cmd_buffer(
1501  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1502  void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1503  sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
1504  unsigned int SrcElemSize, char *DstMem, unsigned int DimDst,
1505  sycl::range<3> DstSize, sycl::id<3> DstOffset, unsigned int DstElemSize,
1506  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1507  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1508  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1509 
1510  const PluginPtr &Plugin = Context->getPlugin();
1511 
1512  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1513  TermPositions SrcPos, DstPos;
1514  prepTermPositions(SrcPos, DimSrc, MemType);
1515  prepTermPositions(DstPos, DimDst, MemType);
1516 
1517  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1518  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1519  size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm] * SrcElemSize;
1520  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1521  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1522 
1523  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1524  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1525  "Images are not supported in Graphs");
1526  }
1527 
1528  if (1 == DimDst && 1 == DimSrc) {
1529  pi_result Result =
1530  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferRead>(
1531  CommandBuffer,
1532  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1533  SrcXOffBytes, SrcAccessRangeWidthBytes, DstMem + DstXOffBytes,
1534  Deps.size(), Deps.data(), OutSyncPoint);
1535 
1536  if (Result == PI_ERROR_INVALID_OPERATION) {
1537  throw sycl::exception(
1538  sycl::make_error_code(sycl::errc::feature_not_supported),
1539  "Device-to-host buffer copy command not supported by graph backend");
1540  } else {
1541  Plugin->checkPiResult(Result);
1542  }
1543  } else {
1544  size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1545  size_t BufferSlicePitch =
1546  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
1547  size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1548  size_t HostSlicePitch =
1549  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
1550 
1551  pi_buff_rect_offset_struct BufferOffset{
1552  SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]};
1553  pi_buff_rect_offset_struct HostOffset{DstXOffBytes, DstOffset[DstPos.YTerm],
1554  DstOffset[DstPos.ZTerm]};
1555  pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
1556  SrcAccessRange[SrcPos.YTerm],
1557  SrcAccessRange[SrcPos.ZTerm]};
1558 
1559  pi_result Result =
1560  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferReadRect>(
1561  CommandBuffer,
1562  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(SrcMem),
1563  &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1564  BufferSlicePitch, HostRowPitch, HostSlicePitch, DstMem, Deps.size(),
1565  Deps.data(), OutSyncPoint);
1566  if (Result == PI_ERROR_INVALID_OPERATION) {
1567  throw sycl::exception(
1568  sycl::make_error_code(sycl::errc::feature_not_supported),
1569  "Device-to-host buffer copy command not supported by graph backend");
1570  } else {
1571  Plugin->checkPiResult(Result);
1572  }
1573  }
1574 }
1575 
1576 void MemoryManager::ext_oneapi_copyH2D_cmd_buffer(
1578  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1579  char *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize,
1580  sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem,
1581  unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
1582  sycl::id<3> DstOffset, unsigned int DstElemSize,
1583  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1584  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1585  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1586 
1587  const PluginPtr &Plugin = Context->getPlugin();
1588 
1589  detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType();
1590  TermPositions SrcPos, DstPos;
1591  prepTermPositions(SrcPos, DimSrc, MemType);
1592  prepTermPositions(DstPos, DimDst, MemType);
1593 
1594  size_t DstXOffBytes = DstOffset[DstPos.XTerm] * DstElemSize;
1595  size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm] * SrcElemSize;
1596  size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm] * DstElemSize;
1597  size_t DstSzWidthBytes = DstSize[DstPos.XTerm] * DstElemSize;
1598  size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm] * SrcElemSize;
1599 
1600  if (MemType != detail::SYCLMemObjI::MemObjType::Buffer) {
1601  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1602  "Images are not supported in Graphs");
1603  }
1604 
1605  if (1 == DimDst && 1 == DimSrc) {
1606  pi_result Result =
1607  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemBufferWrite>(
1608  CommandBuffer,
1609  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1610  DstXOffBytes, DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes,
1611  Deps.size(), Deps.data(), OutSyncPoint);
1612 
1613  if (Result == PI_ERROR_INVALID_OPERATION) {
1614  throw sycl::exception(
1615  sycl::make_error_code(sycl::errc::feature_not_supported),
1616  "Host-to-device buffer copy command not supported by graph backend");
1617  } else {
1618  Plugin->checkPiResult(Result);
1619  }
1620  } else {
1621  size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
1622  size_t BufferSlicePitch =
1623  (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0;
1624  size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
1625  size_t HostSlicePitch =
1626  (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0;
1627 
1628  pi_buff_rect_offset_struct BufferOffset{
1629  DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]};
1630  pi_buff_rect_offset_struct HostOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm],
1631  SrcOffset[SrcPos.ZTerm]};
1632  pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
1633  DstAccessRange[DstPos.YTerm],
1634  DstAccessRange[DstPos.ZTerm]};
1635 
1636  pi_result Result =
1638  CommandBuffer,
1639  sycl::detail::pi::cast<sycl::detail::pi::PiMem>(DstMem),
1640  &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch,
1641  BufferSlicePitch, HostRowPitch, HostSlicePitch, SrcMem, Deps.size(),
1642  Deps.data(), OutSyncPoint);
1643 
1644  if (Result == PI_ERROR_INVALID_OPERATION) {
1645  throw sycl::exception(
1646  sycl::make_error_code(sycl::errc::feature_not_supported),
1647  "Host-to-device buffer copy command not supported by graph backend");
1648  } else {
1649  Plugin->checkPiResult(Result);
1650  }
1651  }
1652 }
1653 
1654 void MemoryManager::ext_oneapi_copy_usm_cmd_buffer(
1655  ContextImplPtr Context, const void *SrcMem,
1656  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, size_t Len,
1657  void *DstMem, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1658  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1659  if (!SrcMem || !DstMem)
1660  throw runtime_error("NULL pointer argument in memory copy operation.",
1661  PI_ERROR_INVALID_VALUE);
1662 
1663  const PluginPtr &Plugin = Context->getPlugin();
1664  pi_result Result =
1665  Plugin->call_nocheck<PiApiKind::piextCommandBufferMemcpyUSM>(
1666  CommandBuffer, DstMem, SrcMem, Len, Deps.size(), Deps.data(),
1667  OutSyncPoint);
1668  if (Result == PI_ERROR_INVALID_OPERATION) {
1669  throw sycl::exception(
1670  sycl::make_error_code(sycl::errc::feature_not_supported),
1671  "USM copy command not supported by graph backend");
1672  } else {
1673  Plugin->checkPiResult(Result);
1674  }
1675 }
1676 
1677 void MemoryManager::ext_oneapi_fill_usm_cmd_buffer(
1679  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem,
1680  size_t Len, int Pattern, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1681  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1682 
1683  if (!DstMem)
1684  throw runtime_error("NULL pointer argument in memory fill operation.",
1685  PI_ERROR_INVALID_VALUE);
1686 
1687  const PluginPtr &Plugin = Context->getPlugin();
1688  // Pattern is interpreted as an unsigned char so pattern size is always 1.
1689  size_t PatternSize = 1;
1691  CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(),
1692  Deps.data(), OutSyncPoint);
1693 }
1694 
1695 void MemoryManager::ext_oneapi_fill_cmd_buffer(
1697  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
1698  void *Mem, size_t PatternSize, const char *Pattern, unsigned int Dim,
1699  sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
1700  unsigned int ElementSize,
1701  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1702  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1703  assert(SYCLMemObj && "The SYCLMemObj is nullptr");
1704 
1705  const PluginPtr &Plugin = Context->getPlugin();
1706  if (SYCLMemObj->getType() != detail::SYCLMemObjI::MemObjType::Buffer) {
1707  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
1708  "Images are not supported in Graphs");
1709  }
1710 
1711  // 2D and 3D buffers accessors can't have custom range or the data will
1712  // likely be discontiguous.
1713  bool RangesUsable = (Dim <= 1) || (Size == AccessRange);
1714  // For 2D and 3D buffers, the offset must be 0, or the data will be
1715  // discontiguous.
1716  bool OffsetUsable = (Dim <= 1) || (AccessOffset == sycl::id<3>{0, 0, 0});
1717  size_t RangeMultiplier = AccessRange[0] * AccessRange[1] * AccessRange[2];
1718 
1719  if (RangesUsable && OffsetUsable) {
1721  CommandBuffer, pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
1722  PatternSize, AccessOffset[0] * ElementSize,
1723  RangeMultiplier * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
1724  return;
1725  }
1726  // The sycl::handler uses a parallel_for kernel in the case of unusable
1727  // Range or Offset, not CG:Fill. So we should not be here.
1728  throw runtime_error("Not supported configuration of fill requested",
1729  PI_ERROR_INVALID_OPERATION);
1730 }
1731 
1732 void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer(
1734  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *Mem,
1735  size_t Length, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1736  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1737  assert(!Context->is_host() && "Host queue not supported in prefetch_usm.");
1738 
1739  const PluginPtr &Plugin = Context->getPlugin();
1741  CommandBuffer, Mem, Length, _pi_usm_migration_flags(0), Deps.size(),
1742  Deps.data(), OutSyncPoint);
1743 }
1744 
1745 void MemoryManager::ext_oneapi_advise_usm_cmd_buffer(
1747  sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const void *Mem,
1748  size_t Length, pi_mem_advice Advice,
1749  std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
1750  sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
1751  assert(!Context->is_host() && "Host queue not supported in advise_usm.");
1752 
1753  const PluginPtr &Plugin = Context->getPlugin();
1755  CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(),
1756  OutSyncPoint);
1757 }
1758 
1759 void MemoryManager::copy_image_bindless(
1760  void *Src, QueueImplPtr Queue, void *Dst,
1762  const sycl::detail::pi::PiMemImageFormat &Format,
1768  const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
1769  sycl::detail::pi::PiEvent *OutEvent) {
1770 
1771  assert(!Queue->getContextImplPtr()->is_host() &&
1772  "Host queue not supported in copy_image_bindless.");
1773  assert((Flags == (sycl::detail::pi::PiImageCopyFlags)
1777  "Invalid flags passed to copy_image_bindless.");
1778  if (!Dst || !Src)
1779  throw sycl::exception(
1780  sycl::make_error_code(errc::invalid),
1781  "NULL pointer argument in bindless image copy operation.");
1782 
1783  const detail::PluginPtr &Plugin = Queue->getPlugin();
1784  Plugin->call<PiApiKind::piextMemImageCopy>(
1785  Queue->getHandleRef(), Dst, Src, &Format, &Desc, Flags, &SrcOffset,
1786  &DstOffset, &CopyExtent, &HostExtent, DepEvents.size(), DepEvents.data(),
1787  OutEvent);
1788 }
1789 
1790 } // namespace detail
1791 } // namespace _V1
1792 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
static void * allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range< 3 > Range, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateMemImage(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateImageObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const sycl::detail::pi::PiMemImageDesc &Desc, const sycl::detail::pi::PiMemImageFormat &Format, const sycl::property_list &PropsList)
static void * allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void * allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::property_list &PropsList)
static void * allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, std::vector< EventImplPtr > DepEvents, sycl::detail::pi::PiEvent &OutEvent)
static void * allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const size_t Size, const sycl::property_list &PropsList)
static void * allocateMemBuffer(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait)
static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
virtual void * allocateHostMem()=0
virtual void releaseMem(ContextImplPtr Context, void *Ptr)=0
virtual MemObjType getType() const =0
virtual void releaseHostMem(void *Ptr)=0
virtual void * allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, sycl::detail::pi::PiEvent &InteropEvent)=0
static void bufferAssociateNotification(const void *, const void *)
static void bufferReleaseNotification(const void *, const void *)
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:65
A unique identifier of an item in an index space.
Definition: id.hpp:36
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
size_t size() const
Definition: range.hpp:56
::pi_mem_flags PiMemFlags
Definition: pi.hpp:142
::pi_ext_sync_point PiExtSyncPoint
Definition: pi.hpp:156
static void memcpyToDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone)
static void memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
void prepTermPositions(TermPositions &pos, int Dimensions, detail::SYCLMemObjI::MemObjType Type)
void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent >, sycl::detail::pi::PiEvent &, const detail::EventImplPtr &)
void memUnmapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static void waitForEvents(const std::vector< EventImplPtr > &Events)
void memBufferMapHelper(const PluginPtr &Plugin, pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem)
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
uint64_t emitMemReleaseBeginTrace(uintptr_t ObjHandle, uintptr_t AllocPtr)
static void memcpyToDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static sycl::detail::pi::PiProgram getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry)
void emitMemAllocEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, size_t AllocSize, size_t GuardZone, uint64_t CorrelationID)
void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr, uint64_t CorrelationID)
void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 >, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
static void memcpyFromDeviceGlobalDirect(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent)
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:34
void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range< 3 > SrcSize, sycl::range< 3 > SrcAccessRange, sycl::id< 3 > SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range< 3 > DstSize, sycl::range< 3 > DstAccessRange, sycl::id< 3 > DstOffset, unsigned int DstElemSize, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl)
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:130
static sycl::detail::pi::PiMemFlags getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly)
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:3233
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:574
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:85
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
Definition: accessor.hpp:3233
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:107
Definition: access.hpp:18
static sycl::event fill(sycl::queue q, void *dev_ptr, const T &pattern, size_t count)
Set pattern to the first count elements of type T starting from dev_ptr.
Definition: memory.hpp:172
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
Definition: pi_cuda.cpp:896
pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_cuda.cpp:236
constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY
Definition: pi.h:734
uintptr_t pi_native_handle
Definition: pi.h:206
pi_bitfield pi_map_flags
Definition: pi.h:741
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:737
_pi_result
Definition: pi.h:213
pi_uint32 pi_bool
Definition: pi.h:204
pi_result piextMemImageCopy(pi_queue command_queue, void *dst_ptr, void *src_ptr, const pi_image_format *image_format, const pi_image_desc *image_desc, const pi_image_copy_flags flags, pi_image_offset src_offset, pi_image_offset dst_offset, pi_image_region copy_extent, pi_image_region host_extent, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to copy image data Host to Device or Device to Host.
Definition: pi_cuda.cpp:427
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_cuda.cpp:925
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC
Definition: pi.h:738
_pi_mem_advice
Definition: pi.h:579
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:789
pi_bitfield pi_mem_properties
Definition: pi.h:747
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:674
pi_result piextCommandBufferMemBufferCopy(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer copy command to the command-buffer.
Definition: pi_cuda.cpp:1097
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_cuda.cpp:227
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_cuda.cpp:915
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Device global variable.
Definition: pi_cuda.cpp:984
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:657
_pi_usm_migration_flags
Definition: pi.h:1944
_pi_image_copy_flags
Definition: pi.h:656
const pi_bool PI_FALSE
Definition: pi.h:665
pi_result piextCommandBufferMemBufferReadRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer read command to the command-buffer.
Definition: pi_cuda.cpp:1128
pi_result piextCommandBufferMemBufferRead(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer read command to the command-buffer.
Definition: pi_cuda.cpp:1119
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
Definition: pi_cuda.cpp:945
pi_result piextCommandBufferMemBufferFill(pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer fill command to the command-buffer.
Definition: pi_cuda.cpp:1163
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:700
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:726
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:712
@ PI_BUFFER_CREATE_TYPE_REGION
Definition: pi.h:662
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:773
uint32_t pi_uint32
Definition: pi.h:202
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_cuda.cpp:598
pi_bitfield pi_mem_flags
Definition: pi.h:731
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_cuda.cpp:762
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D fill API.
Definition: pi_cuda.cpp:932
pi_result piextCommandBufferMemBufferCopyRect(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer copy command to the command-buffer.
Definition: pi_cuda.cpp:1107
pi_result piMemRelease(pi_mem mem)
Definition: pi_cuda.cpp:225
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:736
constexpr pi_map_flags PI_MAP_WRITE
Definition: pi.h:743
constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION
Definition: pi.h:744
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:815
pi_result piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM memcpy command to the command-buffer.
Definition: pi_cuda.cpp:1088
constexpr pi_map_flags PI_MAP_READ
Definition: pi.h:742
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:750
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_cuda.cpp:836
pi_result piextCommandBufferMemBufferWrite(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer write command to the command-buffer.
Definition: pi_cuda.cpp:1141
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:733
pi_result piextCommandBufferFillUSM(pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM fill command to the command-buffer.
Definition: pi_cuda.cpp:1173
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:801
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:474
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
Definition: pi_cuda.cpp:210
pi_result piextCommandBufferAdviseUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t length, pi_mem_advice advice, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Advise command to the command-buffer.
Definition: pi_cuda.cpp:1193
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
Definition: pi_cuda.cpp:957
constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION
Definition: pi.h:749
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:824
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_cuda.cpp:904
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:686
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:124
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
Definition: pi_cuda.cpp:993
pi_result piextCommandBufferMemBufferWriteRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer write command to the command-buffer.
Definition: pi_cuda.cpp:1150
pi_result piextCommandBufferPrefetchUSM(pi_ext_command_buffer command_buffer, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM Prefetch command to the command-buffer.
Definition: pi_cuda.cpp:1184
pi_result piEventRetain(pi_event event)
Definition: pi_cuda.cpp:615
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL
Definition: pi.h:748
DeviceGlobalUSMMem & getOrAllocateDeviceGlobalUSM(const std::shared_ptr< queue_impl > &QueueImpl)
std::unordered_set< RTDeviceBinaryImage * > MImages
OwnedPiEvent getInitEvent(const PluginPtr &Plugin)
sycl::detail::pi::PiEvent GetEvent()
Definition: pi_utils.hpp:50