DPC++ Runtime
Runtime libraries for oneAPI DPC++
usm_impl.cpp
Go to the documentation of this file.
1 //==---------------- usm_impl.cpp - USM API Utils -------------*- C++ -*---==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // ===--------------------------------------------------------------------=== //
8 
9 #include <detail/queue_impl.hpp>
10 #include <detail/usm/usm_impl.hpp>
11 #include <sycl/context.hpp>
13 #include <sycl/detail/os_util.hpp>
14 #include <sycl/detail/pi.hpp>
15 #include <sycl/device.hpp>
17 #include <sycl/usm.hpp>
18 
19 #include <array>
20 #include <cassert>
21 #include <cstdlib>
22 #include <memory>
23 
24 #ifdef XPTI_ENABLE_INSTRUMENTATION
25 // Include the headers necessary for emitting
26 // traces using the trace framework
27 #include "xpti/xpti_trace_framework.hpp"
28 #include <detail/xpti_registry.hpp>
29 #endif
30 
31 namespace sycl {
32 inline namespace _V1 {
33 
35 
36 namespace detail {
37 #ifdef XPTI_ENABLE_INSTRUMENTATION
38 extern xpti::trace_event_data_t *GSYCLGraphEvent;
39 #endif
40 namespace usm {
41 
42 void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt,
43  alloc Kind, const property_list &PropList,
44  const detail::code_location &CodeLoc) {
45 #ifdef XPTI_ENABLE_INSTRUMENTATION
46  // Stash the code location information and propagate
47  detail::tls_code_loc_t CL(CodeLoc);
48  XPTIScope PrepareNotify((void *)alignedAllocHost,
49  (uint16_t)xpti::trace_point_type_t::node_create,
50  SYCL_MEM_ALLOC_STREAM_NAME, "malloc_host");
51  PrepareNotify.addMetadata([&](auto TEvent) {
52  xpti::addMetadata(TEvent, "sycl_device_name", std::string("Host"));
53  xpti::addMetadata(TEvent, "sycl_device", 0);
54  xpti::addMetadata(TEvent, "memory_size", Size);
55  });
56  // Notify XPTI about the memset submission
57  PrepareNotify.notify();
58  // Emit a begin/end scope for this call
59  PrepareNotify.scopedNotify(
60  (uint16_t)xpti::trace_point_type_t::mem_alloc_begin);
61 #endif
62  void *RetVal = nullptr;
63  if (Size == 0)
64  return nullptr;
65 
66  std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
67  if (CtxImpl->is_host()) {
68  if (!Alignment) {
69  // worst case default
70  Alignment = 128;
71  }
72 
74  try {
75  RetVal = Alloc.allocate(Size);
76  } catch (const std::bad_alloc &) {
77  // Conform with Specification behavior
78  RetVal = nullptr;
79  }
80  } else {
81  pi_context C = CtxImpl->getHandleRef();
82  const PluginPtr &Plugin = CtxImpl->getPlugin();
83  pi_result Error;
84 
85  switch (Kind) {
86  case alloc::host: {
87  std::array<pi_usm_mem_properties, 3> Props;
88  auto PropsIter = Props.begin();
89 
90  if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
91  buffer_location>() &&
92  Ctxt.get_platform().has_extension(
93  "cl_intel_mem_alloc_buffer_location")) {
94  *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
95  *PropsIter++ = PropList
98  .get_buffer_location();
99  }
100 
101  assert(PropsIter >= Props.begin() && PropsIter < Props.end());
102  *PropsIter++ = 0; // null-terminate property list
103 
104  Error = Plugin->call_nocheck<PiApiKind::piextUSMHostAlloc>(
105  &RetVal, C, Props.data(), Size, Alignment);
106 
107  break;
108  }
109  case alloc::device:
110  case alloc::shared:
111  case alloc::unknown: {
112  RetVal = nullptr;
113  Error = PI_ERROR_INVALID_VALUE;
114  break;
115  }
116  }
117 
118  // Error is for debugging purposes.
119  // The spec wants a nullptr returned, not an exception.
120  if (Error != PI_SUCCESS)
121  return nullptr;
122  }
123 #ifdef XPTI_ENABLE_INSTRUMENTATION
124  xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr",
125  reinterpret_cast<size_t>(RetVal));
126 #endif
127  return RetVal;
128 }
129 
130 void *alignedAllocInternal(size_t Alignment, size_t Size,
131  const context_impl *CtxImpl,
132  const device_impl *DevImpl, alloc Kind,
133  const property_list &PropList) {
134  void *RetVal = nullptr;
135  if (Size == 0)
136  return nullptr;
137 
138  if (CtxImpl->is_host()) {
139  if (Kind == alloc::unknown) {
140  RetVal = nullptr;
141  } else {
142  if (!Alignment) {
143  // worst case default
144  Alignment = 128;
145  }
146 
148  try {
149  RetVal = Alloc.allocate(Size);
150  } catch (const std::bad_alloc &) {
151  // Conform with Specification behavior
152  RetVal = nullptr;
153  }
154  }
155  } else {
156  pi_context C = CtxImpl->getHandleRef();
157  const PluginPtr &Plugin = CtxImpl->getPlugin();
158  pi_result Error;
159  pi_device Id;
160 
161  switch (Kind) {
162  case alloc::device: {
163  Id = DevImpl->getHandleRef();
164 
165  std::array<pi_usm_mem_properties, 3> Props;
166  auto PropsIter = Props.begin();
167 
168  // Buffer location is only supported on FPGA devices
169  if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
170  buffer_location>() &&
171  DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
172  *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
173  *PropsIter++ = PropList
176  .get_buffer_location();
177  }
178 
179  assert(PropsIter >= Props.begin() && PropsIter < Props.end());
180  *PropsIter++ = 0; // null-terminate property list
181 
182  Error = Plugin->call_nocheck<PiApiKind::piextUSMDeviceAlloc>(
183  &RetVal, C, Id, Props.data(), Size, Alignment);
184 
185  break;
186  }
187  case alloc::shared: {
188  Id = DevImpl->getHandleRef();
189 
190  std::array<pi_usm_mem_properties, 5> Props;
191  auto PropsIter = Props.begin();
192 
193  if (PropList.has_property<
195  *PropsIter++ = PI_MEM_ALLOC_FLAGS;
196  *PropsIter++ = PI_MEM_ALLOC_DEVICE_READ_ONLY;
197  }
198 
199  if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
200  buffer_location>() &&
201  DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
202  *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
203  *PropsIter++ = PropList
206  .get_buffer_location();
207  }
208 
209  assert(PropsIter >= Props.begin() && PropsIter < Props.end());
210  *PropsIter++ = 0; // null-terminate property list
211 
212  Error = Plugin->call_nocheck<PiApiKind::piextUSMSharedAlloc>(
213  &RetVal, C, Id, Props.data(), Size, Alignment);
214 
215  break;
216  }
217  case alloc::host:
218  case alloc::unknown: {
219  RetVal = nullptr;
220  Error = PI_ERROR_INVALID_VALUE;
221  break;
222  }
223  }
224 
225  // Error is for debugging purposes.
226  // The spec wants a nullptr returned, not an exception.
227  if (Error != PI_SUCCESS)
228  return nullptr;
229  }
230  return RetVal;
231 }
232 
233 void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
234  const device &Dev, alloc Kind, const property_list &PropList,
235  const detail::code_location &CodeLoc) {
236 #ifdef XPTI_ENABLE_INSTRUMENTATION
237  // Stash the code location information and propagate
238  detail::tls_code_loc_t CL(CodeLoc);
239  XPTIScope PrepareNotify((void *)alignedAlloc,
240  (uint16_t)xpti::trace_point_type_t::node_create,
241  SYCL_MEM_ALLOC_STREAM_NAME, "usm::alignedAlloc");
242  PrepareNotify.addMetadata([&](auto TEvent) {
243  xpti::addMetadata(TEvent, "sycl_device_name",
244  Dev.get_info<info::device::name>());
245  // Need to determine how to get the device handle reference
246  // xpti::addMetadata(TEvent, "sycl_device", Dev.getHandleRef()));
247  xpti::addMetadata(TEvent, "memory_size", Size);
248  });
249  // Notify XPTI about the memset submission
250  PrepareNotify.notify();
251  // Emit a begin/end scope for this call
252  PrepareNotify.scopedNotify(
253  (uint16_t)xpti::trace_point_type_t::mem_alloc_begin);
254 #endif
255  void *RetVal =
257  getSyclObjImpl(Dev).get(), Kind, PropList);
258 #ifdef XPTI_ENABLE_INSTRUMENTATION
259  xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr",
260  reinterpret_cast<size_t>(RetVal));
261 #endif
262  return RetVal;
263 }
264 
265 void freeInternal(void *Ptr, const context_impl *CtxImpl) {
266  if (Ptr == nullptr)
267  return;
268  if (CtxImpl->is_host()) {
269  // need to use alignedFree here for Windows
271  } else {
272  pi_context C = CtxImpl->getHandleRef();
273  const PluginPtr &Plugin = CtxImpl->getPlugin();
274  Plugin->call<PiApiKind::piextUSMFree>(C, Ptr);
275  }
276 }
277 
278 void free(void *Ptr, const context &Ctxt,
279  const detail::code_location &CodeLoc) {
280 #ifdef XPTI_ENABLE_INSTRUMENTATION
281  // Stash the code location information and propagate
282  detail::tls_code_loc_t CL(CodeLoc);
283  XPTIScope PrepareNotify((void *)free,
284  (uint16_t)xpti::trace_point_type_t::node_create,
285  SYCL_MEM_ALLOC_STREAM_NAME, "usm::free");
286  PrepareNotify.addMetadata([&](auto TEvent) {
287  xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast<size_t>(Ptr));
288  });
289  // Notify XPTI about the memset submission
290  PrepareNotify.notify();
291  // Emit a begin/end scope for this call
292  PrepareNotify.scopedNotify(
293  (uint16_t)xpti::trace_point_type_t::mem_release_begin);
294 #endif
296 }
297 
298 } // namespace usm
299 } // namespace detail
300 
301 void *malloc_device(size_t Size, const device &Dev, const context &Ctxt,
302  const detail::code_location &CodeLoc) {
303  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device,
304  property_list{}, CodeLoc);
305 }
306 
307 void *malloc_device(size_t Size, const device &Dev, const context &Ctxt,
308  const property_list &PropList,
309  const detail::code_location &CodeLoc) {
310  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device, PropList,
311  CodeLoc);
312 }
313 
314 void *malloc_device(size_t Size, const queue &Q,
315  const detail::code_location &CodeLoc) {
316  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
317  alloc::device, property_list{}, CodeLoc);
318 }
319 
320 void *malloc_device(size_t Size, const queue &Q, const property_list &PropList,
321  const detail::code_location &CodeLoc) {
322  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
323  alloc::device, PropList, CodeLoc);
324 }
325 
326 void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev,
327  const context &Ctxt,
328  const detail::code_location &CodeLoc) {
329  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device,
330  property_list{}, CodeLoc);
331 }
332 
333 void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev,
334  const context &Ctxt, const property_list &PropList,
335  const detail::code_location &CodeLoc) {
336  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device,
337  PropList, CodeLoc);
338 }
339 
340 void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q,
341  const detail::code_location &CodeLoc) {
343  Q.get_device(), alloc::device,
344  property_list{}, CodeLoc);
345 }
346 
347 void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q,
348  const property_list &PropList,
349  const detail::code_location &CodeLoc) {
351  Q.get_device(), alloc::device, PropList,
352  CodeLoc);
353 }
354 
355 void free(void *ptr, const context &Ctxt,
356  const detail::code_location &CodeLoc) {
357  return detail::usm::free(ptr, Ctxt, CodeLoc);
358 }
359 
360 void free(void *ptr, const queue &Q, const detail::code_location &CodeLoc) {
361  return detail::usm::free(ptr, Q.get_context(), CodeLoc);
362 }
363 
364 void *malloc_host(size_t Size, const context &Ctxt,
365  const detail::code_location &CodeLoc) {
366  return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host,
367  property_list{}, CodeLoc);
368 }
369 
370 void *malloc_host(size_t Size, const context &Ctxt,
371  const property_list &PropList,
372  const detail::code_location &CodeLoc) {
373  return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host, PropList,
374  CodeLoc);
375 }
376 
377 void *malloc_host(size_t Size, const queue &Q,
378  const detail::code_location &CodeLoc) {
379  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), alloc::host,
380  property_list{}, CodeLoc);
381 }
382 
383 void *malloc_host(size_t Size, const queue &Q, const property_list &PropList,
384  const detail::code_location &CodeLoc) {
385  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), alloc::host,
386  PropList, CodeLoc);
387 }
388 
389 void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt,
390  const detail::code_location &CodeLoc) {
391  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared,
392  property_list{}, CodeLoc);
393 }
394 
395 void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt,
396  const property_list &PropList,
397  const detail::code_location &CodeLoc) {
398  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared, PropList,
399  CodeLoc);
400 }
401 
402 void *malloc_shared(size_t Size, const queue &Q,
403  const detail::code_location &CodeLoc) {
404  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
405  alloc::shared, property_list{}, CodeLoc);
406 }
407 
408 void *malloc_shared(size_t Size, const queue &Q, const property_list &PropList,
409  const detail::code_location &CodeLoc) {
410  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
411  alloc::shared, PropList, CodeLoc);
412 }
413 
414 void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt,
415  const detail::code_location &CodeLoc) {
416  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host,
417  property_list{}, CodeLoc);
418 }
419 
420 void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt,
421  const property_list &PropList,
422  const detail::code_location &CodeLoc) {
423  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host,
424  PropList, CodeLoc);
425 }
426 
427 void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q,
428  const detail::code_location &CodeLoc) {
430  alloc::host, property_list{}, CodeLoc);
431 }
432 
433 void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q,
434  const property_list &PropList,
435  const detail::code_location &CodeLoc) {
437  alloc::host, PropList, CodeLoc);
438 }
439 
440 void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev,
441  const context &Ctxt,
442  const detail::code_location &CodeLoc) {
443  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared,
444  property_list{}, CodeLoc);
445 }
446 
447 void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev,
448  const context &Ctxt, const property_list &PropList,
449  const detail::code_location &CodeLoc) {
450  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared,
451  PropList, CodeLoc);
452 }
453 
454 void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q,
455  const detail::code_location &CodeLoc) {
457  Q.get_device(), alloc::shared,
458  property_list{}, CodeLoc);
459 }
460 
461 void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q,
462  const property_list &PropList,
463  const detail::code_location &CodeLoc) {
465  Q.get_device(), alloc::shared, PropList,
466  CodeLoc);
467 }
468 
469 // single form
470 
471 void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind,
472  const property_list &PropList,
473  const detail::code_location &CodeLoc) {
474  if (Kind == alloc::host)
475  return detail::usm::alignedAllocHost(0, Size, Ctxt, Kind, PropList,
476  CodeLoc);
477  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, Kind, PropList, CodeLoc);
478 }
479 
480 void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind,
481  const detail::code_location &CodeLoc) {
482  if (Kind == alloc::host)
483  return detail::usm::alignedAllocHost(0, Size, Ctxt, Kind, property_list{},
484  CodeLoc);
485  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, Kind, property_list{},
486  CodeLoc);
487 }
488 
489 void *malloc(size_t Size, const queue &Q, alloc Kind,
490  const detail::code_location &CodeLoc) {
491  if (Kind == alloc::host)
492  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), Kind,
493  property_list{}, CodeLoc);
494  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
495  Kind, property_list{}, CodeLoc);
496 }
497 
498 void *malloc(size_t Size, const queue &Q, alloc Kind,
499  const property_list &PropList,
500  const detail::code_location &CodeLoc) {
501  if (Kind == alloc::host)
502  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), Kind,
503  PropList, CodeLoc);
504  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
505  Kind, PropList, CodeLoc);
506 }
507 
508 void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev,
509  const context &Ctxt, alloc Kind,
510  const detail::code_location &CodeLoc) {
511  if (Kind == alloc::host)
512  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, Kind,
513  property_list{}, CodeLoc);
514 
515  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, Kind,
516  property_list{}, CodeLoc);
517 }
518 
519 void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev,
520  const context &Ctxt, alloc Kind,
521  const property_list &PropList,
522  const detail::code_location &CodeLoc) {
523  if (Kind == alloc::host)
524  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, Kind, PropList,
525  CodeLoc);
526  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, Kind, PropList,
527  CodeLoc);
528 }
529 
530 void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind,
531  const detail::code_location &CodeLoc) {
532  if (Kind == alloc::host)
533  return detail::usm::alignedAllocHost(Alignment, Size, Q.get_context(), Kind,
534  property_list{}, CodeLoc);
536  Q.get_device(), Kind, property_list{},
537  CodeLoc);
538 }
539 
540 void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind,
541  const property_list &PropList,
542  const detail::code_location &CodeLoc) {
543  if (Kind == alloc::host)
544  return detail::usm::alignedAllocHost(Alignment, Size, Q.get_context(), Kind,
545  PropList, CodeLoc);
547  Q.get_device(), Kind, PropList, CodeLoc);
548 }
549 
550 // Pointer queries
556 alloc get_pointer_type(const void *Ptr, const context &Ctxt) {
557  if (!Ptr)
558  return alloc::unknown;
559 
560  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
561 
562  // Everything on a host device is just system malloc so call it host
563  if (CtxImpl->is_host())
564  return alloc::host;
565 
566  pi_context PICtx = CtxImpl->getHandleRef();
567  pi_usm_type AllocTy;
568 
569  // query type using PI function
570  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
572  Plugin->call_nocheck<detail::PiApiKind::piextUSMGetMemAllocInfo>(
573  PICtx, Ptr, PI_MEM_ALLOC_TYPE, sizeof(pi_usm_type), &AllocTy,
574  nullptr);
575 
576  // PI_ERROR_INVALID_VALUE means USM doesn't know about this ptr
577  if (Err == PI_ERROR_INVALID_VALUE)
578  return alloc::unknown;
579  // otherwise PI_SUCCESS is expected
580  if (Err != PI_SUCCESS) {
581  Plugin->reportPiError(Err, "get_pointer_type()");
582  }
583 
584  alloc ResultAlloc;
585  switch (AllocTy) {
586  case PI_MEM_TYPE_HOST:
587  ResultAlloc = alloc::host;
588  break;
589  case PI_MEM_TYPE_DEVICE:
590  ResultAlloc = alloc::device;
591  break;
592  case PI_MEM_TYPE_SHARED:
593  ResultAlloc = alloc::shared;
594  break;
595  default:
596  ResultAlloc = alloc::unknown;
597  break;
598  }
599 
600  return ResultAlloc;
601 }
602 
607 device get_pointer_device(const void *Ptr, const context &Ctxt) {
608  // Check if ptr is a valid USM pointer
609  if (get_pointer_type(Ptr, Ctxt) == alloc::unknown)
610  throw runtime_error("Ptr not a valid USM allocation!",
611  PI_ERROR_INVALID_VALUE);
612 
613  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
614 
615  // Just return the host device in the host context
616  if (CtxImpl->is_host())
617  return Ctxt.get_devices()[0];
618 
619  // Check if ptr is a host allocation
620  if (get_pointer_type(Ptr, Ctxt) == alloc::host) {
621  auto Devs = CtxImpl->getDevices();
622  if (Devs.size() == 0)
623  throw runtime_error("No devices in passed context!",
624  PI_ERROR_INVALID_VALUE);
625 
626  // Just return the first device in the context
627  return Devs[0];
628  }
629 
630  pi_context PICtx = CtxImpl->getHandleRef();
631  pi_device DeviceId;
632 
633  // query device using PI function
634  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
636  PICtx, Ptr, PI_MEM_ALLOC_DEVICE, sizeof(pi_device), &DeviceId, nullptr);
637 
638  // The device is not necessarily a member of the context, it could be a
639  // member's descendant instead. Fetch the corresponding device from the cache.
640  std::shared_ptr<detail::platform_impl> PltImpl = CtxImpl->getPlatformImpl();
641  std::shared_ptr<detail::device_impl> DevImpl =
642  PltImpl->getDeviceImpl(DeviceId);
643  if (DevImpl)
644  return detail::createSyclObjFromImpl<device>(DevImpl);
645  throw runtime_error("Cannot find device associated with USM allocation!",
646  PI_ERROR_INVALID_OPERATION);
647 }
648 
649 // Device copy enhancement APIs, prepare_for and release_from USM.
650 
651 static void prepare_for_usm_device_copy(const void *Ptr, size_t Size,
652  const context &Ctxt) {
653  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
654  pi_context PICtx = CtxImpl->getHandleRef();
655  // Call the PI function
656  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
657  Plugin->call<detail::PiApiKind::piextUSMImport>(Ptr, Size, PICtx);
658 }
659 
660 static void release_from_usm_device_copy(const void *Ptr, const context &Ctxt) {
661  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
662  pi_context PICtx = CtxImpl->getHandleRef();
663  // Call the PI function
664  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
665  Plugin->call<detail::PiApiKind::piextUSMRelease>(Ptr, PICtx);
666 }
667 
668 namespace ext::oneapi::experimental {
669 void prepare_for_device_copy(const void *Ptr, size_t Size,
670  const context &Ctxt) {
671  prepare_for_usm_device_copy(Ptr, Size, Ctxt);
672 }
673 
674 void prepare_for_device_copy(const void *Ptr, size_t Size, const queue &Queue) {
675  prepare_for_usm_device_copy(Ptr, Size, Queue.get_context());
676 }
677 
678 void release_from_device_copy(const void *Ptr, const context &Ctxt) {
679  release_from_usm_device_copy(Ptr, Ctxt);
680 }
681 
682 void release_from_device_copy(const void *Ptr, const queue &Queue) {
684 }
685 } // namespace ext::oneapi::experimental
686 
687 } // namespace _V1
688 } // namespace sycl
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:31
piextUSMFree
pi_result piextUSMFree(pi_context context, void *ptr)
Indicates that the allocated USM memory is no longer needed on the runtime side.
Definition: pi_cuda.cpp:863
sycl::_V1::get_pointer_type
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:556
sycl::_V1::get_pointer_device
device get_pointer_device(const void *ptr, const context &ctxt)
Queries the device against which the pointer was allocated Throws an invalid_object_error if ptr is a...
Definition: usm_impl.cpp:607
sycl::_V1::detail::SYCL_MEM_ALLOC_STREAM_NAME
constexpr auto SYCL_MEM_ALLOC_STREAM_NAME
Definition: xpti_registry.hpp:35
_pi_usm_type
_pi_usm_type
Definition: pi.h:1878
device.hpp
xpti_registry.hpp
sycl::_V1::get
pointer_t get() const
Definition: multi_ptr.hpp:974
sycl::_V1::malloc_device
void * malloc_device(size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:301
_pi_result
_pi_result
Definition: pi.h:205
sycl::_V1::context::get_devices
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:139
sycl::_V1::aligned_alloc_device
void * aligned_alloc_device(size_t alignment, size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:326
context.hpp
usm_impl.hpp
os_util.hpp
sycl::_V1::property_list::get_property
PropT get_property() const
Definition: property_list.hpp:47
sycl::_V1::detail::usm::alignedAllocInternal
void * alignedAllocInternal(size_t Alignment, size_t Size, const context_impl *CtxImpl, const device_impl *DevImpl, alloc Kind, const property_list &PropList)
Definition: usm_impl.cpp:130
sycl
Definition: access.hpp:18
sycl::_V1::ext::oneapi::experimental::release_from_device_copy
void release_from_device_copy(const void *Ptr, const context &Context)
Definition: usm_impl.cpp:678
usm.hpp
sycl::_V1::detail::usm::alignedAllocHost
void * alignedAllocHost(size_t Alignment, size_t Bytes, const context &Ctxt, sycl::usm::alloc Kind, const code_location &CL)
sycl::_V1::detail::context_impl::getHandleRef
sycl::detail::pi::PiContext & getHandleRef()
Gets the underlying context object (if any) without reference count modification.
Definition: context_impl.cpp:235
queue_impl.hpp
sycl::_V1::detail::device_impl::has_extension
bool has_extension(const std::string &ExtensionName) const
Check SYCL extension support by device.
Definition: device_impl.cpp:146
pi.hpp
piextUSMSharedAlloc
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
Definition: pi_cuda.cpp:837
PI_MEM_TYPE_DEVICE
@ PI_MEM_TYPE_DEVICE
Definition: pi.h:1881
piextUSMDeviceAlloc
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
Definition: pi_cuda.cpp:828
sycl::_V1::detail::aligned_allocator
Definition: aligned_allocator.hpp:22
PI_MEM_ALLOC_DEVICE_READ_ONLY
constexpr pi_usm_mem_properties PI_MEM_ALLOC_DEVICE_READ_ONLY
Definition: pi.h:723
pi_usm_type
_pi_usm_type pi_usm_type
Definition: pi.h:1895
sycl::_V1::ext::oneapi::experimental::prepare_for_device_copy
void prepare_for_device_copy(const void *Ptr, size_t Size, const context &Context)
Definition: usm_impl.cpp:669
sycl::_V1::queue
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:119
sycl::_V1::prepare_for_usm_device_copy
static void prepare_for_usm_device_copy(const void *Ptr, size_t Size, const context &Ctxt)
Definition: usm_impl.cpp:651
sycl::_V1::detail::tls_code_loc_t
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
PI_MEM_TYPE_SHARED
@ PI_MEM_TYPE_SHARED
Definition: pi.h:1882
sycl::_V1::ext::oneapi::experimental::assert
assert(false)
sycl::_V1::ext::oneapi::experimental::detail::Alignment
@ Alignment
Definition: property.hpp:193
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:59
sycl::_V1::aligned_alloc
void * aligned_alloc(size_t alignment, size_t size, const device &dev, const context &ctxt, usm::alloc kind, const detail::code_location &CodeLoc=detail::code_location::current())
piextUSMHostAlloc
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
Definition: pi_cuda.cpp:856
sycl::_V1::detail::device_impl
Definition: device_impl.hpp:35
sycl::_V1::detail::usm::freeInternal
void freeInternal(void *Ptr, const context_impl *CtxImpl)
Definition: usm_impl.cpp:265
piextUSMGetMemAllocInfo
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
Definition: pi_cuda.cpp:945
sycl::_V1::detail::usm::alignedAlloc
void * alignedAlloc(size_t Alignment, size_t Bytes, const context &Ctxt, const device &Dev, sycl::usm::alloc Kind, const code_location &CL)
aligned_allocator.hpp
sycl::_V1::detail::aligned_allocator::allocate
pointer allocate(size_t Size)
Definition: aligned_allocator.hpp:52
PI_MEM_ALLOC_FLAGS
constexpr pi_usm_mem_properties PI_MEM_ALLOC_FLAGS
Definition: pi.h:717
sycl::_V1::property_list::has_property
bool has_property() const noexcept
Definition: property_list.hpp:55
sycl::_V1::detail::context_impl::is_host
bool is_host() const
Checks if this context is a host context.
Definition: context_impl.cpp:123
sycl::_V1::detail::device_impl::getHandleRef
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
Definition: device_impl.hpp:66
sycl::_V1::queue::get_context
context get_context() const
Definition: queue.cpp:75
sycl::_V1::release_from_usm_device_copy
static void release_from_usm_device_copy(const void *Ptr, const context &Ctxt)
Definition: usm_impl.cpp:660
sycl::_V1::detail::code_location
Definition: common.hpp:66
sycl::_V1::ext::intel::experimental::buffer_location
constexpr buffer_location_key::value_t< N > buffer_location
Definition: fpga_annotated_properties.hpp:94
sycl::_V1::device::get_info
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.cpp:136
sycl::_V1::context::get_platform
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:135
sycl::_V1::aligned_alloc_shared
void * aligned_alloc_shared(size_t alignment, size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:440
PI_MEM_ALLOC_DEVICE
@ PI_MEM_ALLOC_DEVICE
Definition: pi.h:1875
sycl::_V1::alloc
sycl::usm::alloc alloc
Definition: usm_impl.cpp:34
piextUSMRelease
pi_result piextUSMRelease(const void *ptr, pi_context context)
Release host system memory from USM.
Definition: pi_cuda.cpp:957
sycl::_V1::detail::usm::free
void free(void *Ptr, const context &Ctxt, const code_location &CL)
Definition: usm_impl.cpp:278
sycl::_V1::ext::oneapi::property::usm::device_read_only
Definition: usm_properties.hpp:22
sycl::_V1::free
void free(void *ptr, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:355
usm_properties.hpp
piextUSMImport
pi_result piextUSMImport(const void *ptr, size_t size, pi_context context)
Import host system memory into USM.
Definition: pi_cuda.cpp:953
sycl::_V1::malloc_shared
void * malloc_shared(size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:389
sycl::_V1::malloc_host
void * malloc_host(size_t size, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:364
PI_MEM_TYPE_HOST
@ PI_MEM_TYPE_HOST
Definition: pi.h:1880
sycl::_V1::detail::OSUtil::alignedFree
static void alignedFree(void *Ptr)
Deallocates the memory referenced by Ptr.
Definition: os_util.cpp:228
PI_MEM_USM_ALLOC_BUFFER_LOCATION
constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION
Definition: pi.h:725
sycl::_V1::queue::get_device
device get_device() const
Definition: queue.cpp:77
sycl::_V1::malloc
void * malloc(size_t size, const device &dev, const context &ctxt, usm::alloc kind, const detail::code_location &CodeLoc=detail::code_location::current())
sycl::_V1::detail::context_impl::getPlugin
const PluginPtr & getPlugin() const
Definition: context_impl.hpp:111
PI_MEM_ALLOC_TYPE
@ PI_MEM_ALLOC_TYPE
Definition: pi.h:1872
sycl::_V1::detail::context_impl
Definition: context_impl.hpp:33
sycl::_V1::aligned_alloc_host
void * aligned_alloc_host(size_t alignment, size_t size, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:414
pi_device
_pi_device * pi_device
Definition: pi.h:1085
_pi_context
Definition: pi_cuda.hpp:52
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
_pi_device
Definition: pi_cuda.hpp:48
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
sycl::_V1::detail::PluginPtr
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48