DPC++ Runtime
Runtime libraries for oneAPI DPC++
usm_impl.cpp
Go to the documentation of this file.
1 //==---------------- usm_impl.cpp - USM API Utils -------------*- C++ -*---==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // ===--------------------------------------------------------------------=== //
8 
9 #include <detail/queue_impl.hpp>
10 #include <detail/usm/usm_impl.hpp>
11 #include <sycl/context.hpp>
13 #include <sycl/detail/os_util.hpp>
14 #include <sycl/detail/pi.hpp>
15 #include <sycl/device.hpp>
18 #include <sycl/usm.hpp>
19 
20 #include <array>
21 #include <cassert>
22 #include <cstdlib>
23 #include <memory>
24 
25 #ifdef XPTI_ENABLE_INSTRUMENTATION
26 // Include the headers necessary for emitting
27 // traces using the trace framework
28 #include "xpti/xpti_trace_framework.hpp"
29 #include <detail/xpti_registry.hpp>
30 #endif
31 
32 namespace sycl {
33 inline namespace _V1 {
34 
36 
37 namespace detail {
38 #ifdef XPTI_ENABLE_INSTRUMENTATION
39 extern xpti::trace_event_data_t *GSYCLGraphEvent;
40 #endif
41 namespace usm {
42 
43 void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt,
44  alloc Kind, const property_list &PropList,
45  const detail::code_location &CodeLoc) {
46 #ifdef XPTI_ENABLE_INSTRUMENTATION
47  // Stash the code location information and propagate
48  detail::tls_code_loc_t CL(CodeLoc);
49  XPTIScope PrepareNotify((void *)alignedAllocHost,
50  (uint16_t)xpti::trace_point_type_t::node_create,
51  SYCL_MEM_ALLOC_STREAM_NAME, "malloc_host");
52  PrepareNotify.addMetadata([&](auto TEvent) {
53  xpti::addMetadata(TEvent, "sycl_device_name", std::string("Host"));
54  xpti::addMetadata(TEvent, "sycl_device", 0);
55  xpti::addMetadata(TEvent, "memory_size", Size);
56  });
57  // Notify XPTI about the memset submission
58  PrepareNotify.notify();
59  // Emit a begin/end scope for this call
60  PrepareNotify.scopedNotify(
61  (uint16_t)xpti::trace_point_type_t::mem_alloc_begin);
62 #endif
63  const auto &devices = Ctxt.get_devices();
64  if (!std::any_of(devices.begin(), devices.end(), [&](const auto &device) {
65  return device.has(sycl::aspect::usm_host_allocations);
66  })) {
67  throw sycl::exception(
68  sycl::errc::feature_not_supported,
69  "No device in this context supports USM host allocations!");
70  }
71  void *RetVal = nullptr;
72  if (Size == 0)
73  return nullptr;
74 
75  std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
76  if (CtxImpl->is_host()) {
77  if (!Alignment) {
78  // worst case default
79  Alignment = 128;
80  }
81 
83  try {
84  RetVal = Alloc.allocate(Size);
85  } catch (const std::bad_alloc &) {
86  // Conform with Specification behavior
87  RetVal = nullptr;
88  }
89  } else {
90  pi_context C = CtxImpl->getHandleRef();
91  const PluginPtr &Plugin = CtxImpl->getPlugin();
92  pi_result Error = PI_ERROR_INVALID_VALUE;
93 
94  switch (Kind) {
95  case alloc::host: {
96  std::array<pi_usm_mem_properties, 3> Props;
97  auto PropsIter = Props.begin();
98 
99  if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
100  buffer_location>() &&
101  Ctxt.get_platform().has_extension(
102  "cl_intel_mem_alloc_buffer_location")) {
103  *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
104  *PropsIter++ = PropList
107  .get_buffer_location();
108  }
109 
110  assert(PropsIter >= Props.begin() && PropsIter < Props.end());
111  *PropsIter++ = 0; // null-terminate property list
112 
113  Error = Plugin->call_nocheck<PiApiKind::piextUSMHostAlloc>(
114  &RetVal, C, Props.data(), Size, Alignment);
115 
116  break;
117  }
118  case alloc::device:
119  case alloc::shared:
120  case alloc::unknown: {
121  RetVal = nullptr;
122  Error = PI_ERROR_INVALID_VALUE;
123  break;
124  }
125  }
126 
127  // Error is for debugging purposes.
128  // The spec wants a nullptr returned, not an exception.
129  if (Error != PI_SUCCESS)
130  return nullptr;
131  }
132 #ifdef XPTI_ENABLE_INSTRUMENTATION
133  xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr",
134  reinterpret_cast<size_t>(RetVal));
135 #endif
136  return RetVal;
137 }
138 
139 void *alignedAllocInternal(size_t Alignment, size_t Size,
140  const context_impl *CtxImpl,
141  const device_impl *DevImpl, alloc Kind,
142  const property_list &PropList) {
143  if (Kind == alloc::device &&
144  !DevImpl->has(sycl::aspect::usm_device_allocations)) {
145  throw sycl::exception(sycl::errc::feature_not_supported,
146  "Device does not support USM device allocations!");
147  }
148  if (Kind == alloc::shared &&
149  !DevImpl->has(sycl::aspect::usm_shared_allocations)) {
150  throw sycl::exception(sycl::errc::feature_not_supported,
151  "Device does not support shared USM allocations!");
152  }
153  void *RetVal = nullptr;
154  if (Size == 0)
155  return nullptr;
156 
157  if (CtxImpl->is_host()) {
158  if (Kind == alloc::unknown) {
159  RetVal = nullptr;
160  } else {
161  if (!Alignment) {
162  // worst case default
163  Alignment = 128;
164  }
165 
167  try {
168  RetVal = Alloc.allocate(Size);
169  } catch (const std::bad_alloc &) {
170  // Conform with Specification behavior
171  RetVal = nullptr;
172  }
173  }
174  } else {
175  pi_context C = CtxImpl->getHandleRef();
176  const PluginPtr &Plugin = CtxImpl->getPlugin();
177  pi_result Error = PI_ERROR_INVALID_VALUE;
178  pi_device Id;
179 
180  switch (Kind) {
181  case alloc::device: {
182  Id = DevImpl->getHandleRef();
183 
184  std::array<pi_usm_mem_properties, 3> Props;
185  auto PropsIter = Props.begin();
186 
187  // Buffer location is only supported on FPGA devices
188  if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
189  buffer_location>() &&
190  DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
191  *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
192  *PropsIter++ = PropList
195  .get_buffer_location();
196  }
197 
198  assert(PropsIter >= Props.begin() && PropsIter < Props.end());
199  *PropsIter++ = 0; // null-terminate property list
200 
201  Error = Plugin->call_nocheck<PiApiKind::piextUSMDeviceAlloc>(
202  &RetVal, C, Id, Props.data(), Size, Alignment);
203 
204  break;
205  }
206  case alloc::shared: {
207  Id = DevImpl->getHandleRef();
208 
209  std::array<pi_usm_mem_properties, 5> Props;
210  auto PropsIter = Props.begin();
211 
212  if (PropList.has_property<
214  *PropsIter++ = PI_MEM_ALLOC_FLAGS;
215  *PropsIter++ = PI_MEM_ALLOC_DEVICE_READ_ONLY;
216  }
217 
218  if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
219  buffer_location>() &&
220  DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
221  *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
222  *PropsIter++ = PropList
225  .get_buffer_location();
226  }
227 
228  assert(PropsIter >= Props.begin() && PropsIter < Props.end());
229  *PropsIter++ = 0; // null-terminate property list
230 
231  Error = Plugin->call_nocheck<PiApiKind::piextUSMSharedAlloc>(
232  &RetVal, C, Id, Props.data(), Size, Alignment);
233 
234  break;
235  }
236  case alloc::host:
237  case alloc::unknown: {
238  RetVal = nullptr;
239  Error = PI_ERROR_INVALID_VALUE;
240  break;
241  }
242  }
243 
244  // Error is for debugging purposes.
245  // The spec wants a nullptr returned, not an exception.
246  if (Error != PI_SUCCESS)
247  return nullptr;
248  }
249  return RetVal;
250 }
251 
252 void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
253  const device &Dev, alloc Kind, const property_list &PropList,
254  const detail::code_location &CodeLoc) {
255 #ifdef XPTI_ENABLE_INSTRUMENTATION
256  // Stash the code location information and propagate
257  detail::tls_code_loc_t CL(CodeLoc);
258  XPTIScope PrepareNotify((void *)alignedAlloc,
259  (uint16_t)xpti::trace_point_type_t::node_create,
260  SYCL_MEM_ALLOC_STREAM_NAME, "usm::alignedAlloc");
261  PrepareNotify.addMetadata([&](auto TEvent) {
262  xpti::addMetadata(TEvent, "sycl_device_name",
263  Dev.get_info<info::device::name>());
264  // Need to determine how to get the device handle reference
265  // xpti::addMetadata(TEvent, "sycl_device", Dev.getHandleRef()));
266  xpti::addMetadata(TEvent, "memory_size", Size);
267  });
268  // Notify XPTI about the memset submission
269  PrepareNotify.notify();
270  // Emit a begin/end scope for this call
271  PrepareNotify.scopedNotify(
272  (uint16_t)xpti::trace_point_type_t::mem_alloc_begin);
273 #endif
274  void *RetVal =
276  getSyclObjImpl(Dev).get(), Kind, PropList);
277 #ifdef XPTI_ENABLE_INSTRUMENTATION
278  xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr",
279  reinterpret_cast<size_t>(RetVal));
280 #endif
281  return RetVal;
282 }
283 
284 void freeInternal(void *Ptr, const context_impl *CtxImpl) {
285  if (Ptr == nullptr)
286  return;
287  if (CtxImpl->is_host()) {
288  // need to use alignedFree here for Windows
290  } else {
291  pi_context C = CtxImpl->getHandleRef();
292  const PluginPtr &Plugin = CtxImpl->getPlugin();
293  Plugin->call<PiApiKind::piextUSMFree>(C, Ptr);
294  }
295 }
296 
297 void free(void *Ptr, const context &Ctxt,
298  const detail::code_location &CodeLoc) {
299 #ifdef XPTI_ENABLE_INSTRUMENTATION
300  // Stash the code location information and propagate
301  detail::tls_code_loc_t CL(CodeLoc);
302  XPTIScope PrepareNotify((void *)free,
303  (uint16_t)xpti::trace_point_type_t::node_create,
304  SYCL_MEM_ALLOC_STREAM_NAME, "usm::free");
305  PrepareNotify.addMetadata([&](auto TEvent) {
306  xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast<size_t>(Ptr));
307  });
308  // Notify XPTI about the memset submission
309  PrepareNotify.notify();
310  // Emit a begin/end scope for this call
311  PrepareNotify.scopedNotify(
312  (uint16_t)xpti::trace_point_type_t::mem_release_begin);
313 #endif
315 }
316 
317 } // namespace usm
318 } // namespace detail
319 
320 void *malloc_device(size_t Size, const device &Dev, const context &Ctxt,
321  const detail::code_location &CodeLoc) {
322  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device,
323  property_list{}, CodeLoc);
324 }
325 
326 void *malloc_device(size_t Size, const device &Dev, const context &Ctxt,
327  const property_list &PropList,
328  const detail::code_location &CodeLoc) {
329  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device, PropList,
330  CodeLoc);
331 }
332 
333 void *malloc_device(size_t Size, const queue &Q,
334  const detail::code_location &CodeLoc) {
335  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
336  alloc::device, property_list{}, CodeLoc);
337 }
338 
339 void *malloc_device(size_t Size, const queue &Q, const property_list &PropList,
340  const detail::code_location &CodeLoc) {
341  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
342  alloc::device, PropList, CodeLoc);
343 }
344 
345 void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev,
346  const context &Ctxt,
347  const detail::code_location &CodeLoc) {
348  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device,
349  property_list{}, CodeLoc);
350 }
351 
352 void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev,
353  const context &Ctxt, const property_list &PropList,
354  const detail::code_location &CodeLoc) {
355  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device,
356  PropList, CodeLoc);
357 }
358 
359 void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q,
360  const detail::code_location &CodeLoc) {
362  Q.get_device(), alloc::device,
363  property_list{}, CodeLoc);
364 }
365 
366 void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q,
367  const property_list &PropList,
368  const detail::code_location &CodeLoc) {
369  return detail::usm::alignedAlloc(Alignment, Size, Q.get_context(),
370  Q.get_device(), alloc::device, PropList,
371  CodeLoc);
372 }
373 
374 void free(void *ptr, const context &Ctxt,
375  const detail::code_location &CodeLoc) {
376  return detail::usm::free(ptr, Ctxt, CodeLoc);
377 }
378 
379 void free(void *ptr, const queue &Q, const detail::code_location &CodeLoc) {
380  return detail::usm::free(ptr, Q.get_context(), CodeLoc);
381 }
382 
383 void *malloc_host(size_t Size, const context &Ctxt,
384  const detail::code_location &CodeLoc) {
385  return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host,
386  property_list{}, CodeLoc);
387 }
388 
389 void *malloc_host(size_t Size, const context &Ctxt,
390  const property_list &PropList,
391  const detail::code_location &CodeLoc) {
392  return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host, PropList,
393  CodeLoc);
394 }
395 
396 void *malloc_host(size_t Size, const queue &Q,
397  const detail::code_location &CodeLoc) {
398  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), alloc::host,
399  property_list{}, CodeLoc);
400 }
401 
402 void *malloc_host(size_t Size, const queue &Q, const property_list &PropList,
403  const detail::code_location &CodeLoc) {
404  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), alloc::host,
405  PropList, CodeLoc);
406 }
407 
408 void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt,
409  const detail::code_location &CodeLoc) {
410  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared,
411  property_list{}, CodeLoc);
412 }
413 
414 void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt,
415  const property_list &PropList,
416  const detail::code_location &CodeLoc) {
417  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared, PropList,
418  CodeLoc);
419 }
420 
421 void *malloc_shared(size_t Size, const queue &Q,
422  const detail::code_location &CodeLoc) {
423  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
424  alloc::shared, property_list{}, CodeLoc);
425 }
426 
427 void *malloc_shared(size_t Size, const queue &Q, const property_list &PropList,
428  const detail::code_location &CodeLoc) {
429  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
430  alloc::shared, PropList, CodeLoc);
431 }
432 
433 void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt,
434  const detail::code_location &CodeLoc) {
435  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host,
436  property_list{}, CodeLoc);
437 }
438 
439 void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt,
440  const property_list &PropList,
441  const detail::code_location &CodeLoc) {
442  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host,
443  PropList, CodeLoc);
444 }
445 
446 void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q,
447  const detail::code_location &CodeLoc) {
449  alloc::host, property_list{}, CodeLoc);
450 }
451 
452 void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q,
453  const property_list &PropList,
454  const detail::code_location &CodeLoc) {
455  return detail::usm::alignedAllocHost(Alignment, Size, Q.get_context(),
456  alloc::host, PropList, CodeLoc);
457 }
458 
459 void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev,
460  const context &Ctxt,
461  const detail::code_location &CodeLoc) {
462  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared,
463  property_list{}, CodeLoc);
464 }
465 
466 void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev,
467  const context &Ctxt, const property_list &PropList,
468  const detail::code_location &CodeLoc) {
469  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared,
470  PropList, CodeLoc);
471 }
472 
473 void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q,
474  const detail::code_location &CodeLoc) {
476  Q.get_device(), alloc::shared,
477  property_list{}, CodeLoc);
478 }
479 
480 void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q,
481  const property_list &PropList,
482  const detail::code_location &CodeLoc) {
483  return detail::usm::alignedAlloc(Alignment, Size, Q.get_context(),
484  Q.get_device(), alloc::shared, PropList,
485  CodeLoc);
486 }
487 
488 // single form
489 
490 void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind,
491  const property_list &PropList,
492  const detail::code_location &CodeLoc) {
493  if (Kind == alloc::host)
494  return detail::usm::alignedAllocHost(0, Size, Ctxt, Kind, PropList,
495  CodeLoc);
496  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, Kind, PropList, CodeLoc);
497 }
498 
499 void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind,
500  const detail::code_location &CodeLoc) {
501  if (Kind == alloc::host)
502  return detail::usm::alignedAllocHost(0, Size, Ctxt, Kind, property_list{},
503  CodeLoc);
504  return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, Kind, property_list{},
505  CodeLoc);
506 }
507 
508 void *malloc(size_t Size, const queue &Q, alloc Kind,
509  const detail::code_location &CodeLoc) {
510  if (Kind == alloc::host)
511  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), Kind,
512  property_list{}, CodeLoc);
513  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
514  Kind, property_list{}, CodeLoc);
515 }
516 
517 void *malloc(size_t Size, const queue &Q, alloc Kind,
518  const property_list &PropList,
519  const detail::code_location &CodeLoc) {
520  if (Kind == alloc::host)
521  return detail::usm::alignedAllocHost(0, Size, Q.get_context(), Kind,
522  PropList, CodeLoc);
523  return detail::usm::alignedAlloc(0, Size, Q.get_context(), Q.get_device(),
524  Kind, PropList, CodeLoc);
525 }
526 
527 void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev,
528  const context &Ctxt, alloc Kind,
529  const detail::code_location &CodeLoc) {
530  if (Kind == alloc::host)
531  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, Kind,
532  property_list{}, CodeLoc);
533 
534  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, Kind,
535  property_list{}, CodeLoc);
536 }
537 
538 void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev,
539  const context &Ctxt, alloc Kind,
540  const property_list &PropList,
541  const detail::code_location &CodeLoc) {
542  if (Kind == alloc::host)
543  return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, Kind, PropList,
544  CodeLoc);
545  return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, Kind, PropList,
546  CodeLoc);
547 }
548 
549 void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind,
550  const detail::code_location &CodeLoc) {
551  if (Kind == alloc::host)
552  return detail::usm::alignedAllocHost(Alignment, Size, Q.get_context(), Kind,
553  property_list{}, CodeLoc);
555  Q.get_device(), Kind, property_list{},
556  CodeLoc);
557 }
558 
559 void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind,
560  const property_list &PropList,
561  const detail::code_location &CodeLoc) {
562  if (Kind == alloc::host)
563  return detail::usm::alignedAllocHost(Alignment, Size, Q.get_context(), Kind,
564  PropList, CodeLoc);
566  Q.get_device(), Kind, PropList, CodeLoc);
567 }
568 
569 // Pointer queries
575 alloc get_pointer_type(const void *Ptr, const context &Ctxt) {
576  if (!Ptr)
577  return alloc::unknown;
578 
579  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
580 
581  // Everything on a host device is just system malloc so call it host
582  if (CtxImpl->is_host())
583  return alloc::host;
584 
585  pi_context PICtx = CtxImpl->getHandleRef();
586  pi_usm_type AllocTy;
587 
588  // query type using PI function
589  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
591  Plugin->call_nocheck<detail::PiApiKind::piextUSMGetMemAllocInfo>(
592  PICtx, Ptr, PI_MEM_ALLOC_TYPE, sizeof(pi_usm_type), &AllocTy,
593  nullptr);
594 
595  // PI_ERROR_INVALID_VALUE means USM doesn't know about this ptr
596  if (Err == PI_ERROR_INVALID_VALUE)
597  return alloc::unknown;
598  // otherwise PI_SUCCESS is expected
599  if (Err != PI_SUCCESS) {
600  Plugin->reportPiError(Err, "get_pointer_type()");
601  }
602 
603  alloc ResultAlloc;
604  switch (AllocTy) {
605  case PI_MEM_TYPE_HOST:
606  ResultAlloc = alloc::host;
607  break;
608  case PI_MEM_TYPE_DEVICE:
609  ResultAlloc = alloc::device;
610  break;
611  case PI_MEM_TYPE_SHARED:
612  ResultAlloc = alloc::shared;
613  break;
614  default:
615  ResultAlloc = alloc::unknown;
616  break;
617  }
618 
619  return ResultAlloc;
620 }
621 
626 device get_pointer_device(const void *Ptr, const context &Ctxt) {
627  // Check if ptr is a valid USM pointer
628  if (get_pointer_type(Ptr, Ctxt) == alloc::unknown)
629  throw runtime_error("Ptr not a valid USM allocation!",
630  PI_ERROR_INVALID_VALUE);
631 
632  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
633 
634  // Just return the host device in the host context
635  if (CtxImpl->is_host())
636  return Ctxt.get_devices()[0];
637 
638  // Check if ptr is a host allocation
639  if (get_pointer_type(Ptr, Ctxt) == alloc::host) {
640  auto Devs = CtxImpl->getDevices();
641  if (Devs.size() == 0)
642  throw runtime_error("No devices in passed context!",
643  PI_ERROR_INVALID_VALUE);
644 
645  // Just return the first device in the context
646  return Devs[0];
647  }
648 
649  pi_context PICtx = CtxImpl->getHandleRef();
650  pi_device DeviceId;
651 
652  // query device using PI function
653  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
655  PICtx, Ptr, PI_MEM_ALLOC_DEVICE, sizeof(pi_device), &DeviceId, nullptr);
656 
657  // The device is not necessarily a member of the context, it could be a
658  // member's descendant instead. Fetch the corresponding device from the cache.
659  std::shared_ptr<detail::platform_impl> PltImpl = CtxImpl->getPlatformImpl();
660  std::shared_ptr<detail::device_impl> DevImpl =
661  PltImpl->getDeviceImpl(DeviceId);
662  if (DevImpl)
663  return detail::createSyclObjFromImpl<device>(DevImpl);
664  throw runtime_error("Cannot find device associated with USM allocation!",
665  PI_ERROR_INVALID_OPERATION);
666 }
667 
668 // Device copy enhancement APIs, prepare_for and release_from USM.
669 
670 static void prepare_for_usm_device_copy(const void *Ptr, size_t Size,
671  const context &Ctxt) {
672  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
673  pi_context PICtx = CtxImpl->getHandleRef();
674  // Call the PI function
675  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
676  Plugin->call<detail::PiApiKind::piextUSMImport>(Ptr, Size, PICtx);
677 }
678 
679 static void release_from_usm_device_copy(const void *Ptr, const context &Ctxt) {
680  std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
681  pi_context PICtx = CtxImpl->getHandleRef();
682  // Call the PI function
683  const detail::PluginPtr &Plugin = CtxImpl->getPlugin();
684  Plugin->call<detail::PiApiKind::piextUSMRelease>(Ptr, PICtx);
685 }
686 
687 namespace ext::oneapi::experimental {
688 void prepare_for_device_copy(const void *Ptr, size_t Size,
689  const context &Ctxt) {
690  prepare_for_usm_device_copy(Ptr, Size, Ctxt);
691 }
692 
693 void prepare_for_device_copy(const void *Ptr, size_t Size, const queue &Queue) {
694  prepare_for_usm_device_copy(Ptr, Size, Queue.get_context());
695 }
696 
697 void release_from_device_copy(const void *Ptr, const context &Ctxt) {
698  release_from_usm_device_copy(Ptr, Ctxt);
699 }
700 
701 void release_from_device_copy(const void *Ptr, const queue &Queue) {
703 }
704 } // namespace ext::oneapi::experimental
705 
706 } // namespace _V1
707 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:152
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:148
static void alignedFree(void *Ptr)
Deallocates the memory referenced by Ptr.
Definition: os_util.cpp:233
const PluginPtr & getPlugin() const
bool is_host() const
Checks if this context is a host context.
sycl::detail::pi::PiContext & getHandleRef()
Gets the underlying context object (if any) without reference count modification.
bool has(aspect Aspect) const
Indicates if the SYCL device has the given feature.
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
Definition: device_impl.hpp:66
bool has_extension(const std::string &ExtensionName) const
Check SYCL extension support by device.
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.hpp:223
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:111
device get_device() const
Definition: queue.cpp:76
context get_context() const
Definition: queue.cpp:74
void * alignedAllocHost(size_t Alignment, size_t Bytes, const context &Ctxt, sycl::usm::alloc Kind, const code_location &CL)
void freeInternal(void *Ptr, const context_impl *CtxImpl)
Definition: usm_impl.cpp:284
void * alignedAlloc(size_t Alignment, size_t Bytes, const context &Ctxt, const device &Dev, sycl::usm::alloc Kind, const code_location &CL)
void free(void *Ptr, const context &Ctxt, const code_location &CL)
Definition: usm_impl.cpp:297
void * alignedAllocInternal(size_t Alignment, size_t Size, const context_impl *CtxImpl, const device_impl *DevImpl, alloc Kind, const property_list &PropList)
Definition: usm_impl.cpp:139
constexpr auto SYCL_MEM_ALLOC_STREAM_NAME
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
constexpr buffer_location_key::value_t< N > buffer_location
void prepare_for_device_copy(const void *Ptr, size_t Size, const context &Context)
Definition: usm_impl.cpp:688
void release_from_device_copy(const void *Ptr, const context &Context)
Definition: usm_impl.cpp:697
void * aligned_alloc(size_t alignment, size_t size, const device &dev, const context &ctxt, usm::alloc kind, const detail::code_location &CodeLoc=detail::code_location::current())
void * aligned_alloc_host(size_t alignment, size_t size, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:433
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:575
void * aligned_alloc_shared(size_t alignment, size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:459
void * malloc_shared(size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:408
static void release_from_usm_device_copy(const void *Ptr, const context &Ctxt)
Definition: usm_impl.cpp:679
pointer get() const
Definition: multi_ptr.hpp:544
device get_pointer_device(const void *ptr, const context &ctxt)
Queries the device against which the pointer was allocated Throws an invalid_object_error if ptr is a...
Definition: usm_impl.cpp:626
void * aligned_alloc_device(size_t alignment, size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:345
void * malloc_device(size_t size, const device &dev, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:320
void * malloc(size_t size, const device &dev, const context &ctxt, usm::alloc kind, const detail::code_location &CodeLoc=detail::code_location::current())
void * malloc_host(size_t size, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:383
static void prepare_for_usm_device_copy(const void *Ptr, size_t Size, const context &Ctxt)
Definition: usm_impl.cpp:670
sycl::usm::alloc alloc
Definition: usm_impl.cpp:35
void free(void *ptr, const context &ctxt, const detail::code_location &CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:374
Definition: access.hpp:18
_pi_result
Definition: pi.h:224
constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION
Definition: pi.h:789
pi_result piextUSMFree(pi_context context, void *ptr)
Indicates that the allocated USM memory is no longer needed on the runtime side.
Definition: pi_cuda.cpp:895
constexpr pi_usm_mem_properties PI_MEM_ALLOC_FLAGS
Definition: pi.h:781
pi_result piextUSMImport(const void *ptr, size_t size, pi_context context)
Import host system memory into USM.
Definition: pi_cuda.cpp:985
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
Definition: pi_cuda.cpp:888
_pi_usm_type
Definition: pi.h:1965
@ PI_MEM_TYPE_SHARED
Definition: pi.h:1969
@ PI_MEM_TYPE_DEVICE
Definition: pi.h:1968
@ PI_MEM_TYPE_HOST
Definition: pi.h:1967
pi_result piextUSMRelease(const void *ptr, pi_context context)
Release host system memory from USM.
Definition: pi_cuda.cpp:989
_pi_device * pi_device
Definition: pi.h:1149
constexpr pi_usm_mem_properties PI_MEM_ALLOC_DEVICE_READ_ONLY
Definition: pi.h:787
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
Definition: pi_cuda.cpp:860
@ PI_MEM_ALLOC_TYPE
Definition: pi.h:1959
@ PI_MEM_ALLOC_DEVICE
Definition: pi.h:1962
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
Definition: pi_cuda.cpp:869
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
Definition: pi_cuda.cpp:977
_pi_usm_type pi_usm_type
Definition: pi.h:1982
C++ wrapper of extern "C" PI interfaces.
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept