DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_hip.cpp
Go to the documentation of this file.
1 //==---------- pi_hip.cpp - HIP Plugin ------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
13 
16 #include <CL/sycl/detail/pi.hpp>
17 #include <pi_hip.hpp>
18 
19 #include <algorithm>
20 #include <cassert>
21 #include <hip/hip_runtime.h>
22 #include <limits>
23 #include <memory>
24 #include <mutex>
25 #include <regex>
26 #include <string.h>
27 
28 namespace {
29 // Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be
30 // indexed, but on NVidia it is an opaque type and needs to go through
31 // cuArrayGetDescriptor so implement a utility function to get the array
32 // properties
33 inline void getArrayDesc(hipArray *array, hipArray_Format &format,
34  size_t &channels) {
35 #if defined(__HIP_PLATFORM_AMD__)
36  format = array->Format;
37  channels = array->NumChannels;
38 #elif defined(__HIP_PLATFORM_NVIDIA__)
39  CUDA_ARRAY_DESCRIPTOR arrayDesc;
40  cuArrayGetDescriptor(&arrayDesc, (CUarray)array);
41 
42  format = arrayDesc.Format;
43  channels = arrayDesc.NumChannels;
44 #else
45 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
46 #endif
47 }
48 
49 // NVidia HIP headers guard hipArray3DCreate behind __CUDACC__, this does not
50 // seem to be required and we're not using nvcc to build the HIP PI plugin so
51 // add the translation function here
52 #if defined(__HIP_PLATFORM_NVIDIA__) && !defined(__CUDACC__)
53 inline static hipError_t
54 hipArray3DCreate(hiparray *pHandle,
55  const HIP_ARRAY3D_DESCRIPTOR *pAllocateArray) {
56  return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray));
57 }
58 #endif
59 
60 // hipArray gets turned into cudaArray when using the HIP NVIDIA platform, and
61 // some CUDA APIs use cudaArray* and others use CUarray, these two represent the
62 // same type, however when building cudaArray appears as an opaque type, so it
63 // needs to be explicitly casted to CUarray. In order for this to work for both
64 // AMD and NVidia we introduce an second hipArray type that will be CUarray for
65 // NVIDIA and hipArray* for AMD so that we can place the explicit casts when
66 // necessary for NVIDIA and they will be no-ops for AMD.
67 #if defined(__HIP_PLATFORM_NVIDIA__)
68 typedef CUarray hipCUarray;
69 #elif defined(__HIP_PLATFORM_AMD__)
70 typedef hipArray *hipCUarray;
71 #else
72 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
73 #endif
74 
75 // Add missing HIP to CUDA defines
76 #if defined(__HIP_PLATFORM_NVIDIA__)
77 #define hipMemoryType CUmemorytype
78 #define hipMemoryTypeHost CU_MEMORYTYPE_HOST
79 #define hipMemoryTypeDevice CU_MEMORYTYPE_DEVICE
80 #define hipMemoryTypeArray CU_MEMORYTYPE_ARRAY
81 #define hipMemoryTypeUnified CU_MEMORYTYPE_UNIFIED
82 #endif
83 
84 std::string getHipVersionString() {
85  int driver_version = 0;
86  if (hipDriverGetVersion(&driver_version) != hipSuccess) {
87  return "";
88  }
89  // The version is returned as (1000 major + 10 minor).
90  std::stringstream stream;
91  stream << "HIP " << driver_version / 1000 << "."
92  << driver_version % 1000 / 10;
93  return stream.str();
94 }
95 
96 pi_result map_error(hipError_t result) {
97  switch (result) {
98  case hipSuccess:
99  return PI_SUCCESS;
100  case hipErrorInvalidContext:
101  return PI_ERROR_INVALID_CONTEXT;
102  case hipErrorInvalidDevice:
103  return PI_ERROR_INVALID_DEVICE;
104  case hipErrorInvalidValue:
105  return PI_ERROR_INVALID_VALUE;
106  case hipErrorOutOfMemory:
107  return PI_ERROR_OUT_OF_HOST_MEMORY;
108  case hipErrorLaunchOutOfResources:
109  return PI_ERROR_OUT_OF_RESOURCES;
110  default:
111  return PI_ERROR_UNKNOWN;
112  }
113 }
114 
115 // Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR
116 constexpr size_t MaxMessageSize = 256;
117 thread_local pi_result ErrorMessageCode = PI_SUCCESS;
118 thread_local char ErrorMessage[MaxMessageSize];
119 
120 // Utility function for setting a message and warning
121 [[maybe_unused]] static void setErrorMessage(const char *message,
122  pi_result error_code) {
123  assert(strlen(message) <= MaxMessageSize);
124  strcpy(ErrorMessage, message);
125  ErrorMessageCode = error_code;
126 }
127 
128 // Returns plugin specific error and warning messages
129 pi_result hip_piPluginGetLastError(char **message) {
130  *message = &ErrorMessage[0];
131  return ErrorMessageCode;
132 }
133 
134 // Iterates over the event wait list, returns correct pi_result error codes.
135 // Invokes the callback for the latest event of each queue in the wait list.
136 // The callback must take a single pi_event argument and return a pi_result.
137 template <typename Func>
138 pi_result forLatestEvents(const pi_event *event_wait_list,
139  std::size_t num_events_in_wait_list, Func &&f) {
140 
141  if (event_wait_list == nullptr || num_events_in_wait_list == 0) {
142  return PI_ERROR_INVALID_EVENT_WAIT_LIST;
143  }
144 
145  // Fast path if we only have a single event
146  if (num_events_in_wait_list == 1) {
147  return f(event_wait_list[0]);
148  }
149 
150  std::vector<pi_event> events{event_wait_list,
151  event_wait_list + num_events_in_wait_list};
152  std::sort(events.begin(), events.end(), [](pi_event e0, pi_event e1) {
153  // Tiered sort creating sublists of streams (smallest value first) in which
154  // the corresponding events are sorted into a sequence of newest first.
155  return e0->get_queue()->stream_ < e1->get_queue()->stream_ ||
156  (e0->get_queue()->stream_ == e1->get_queue()->stream_ &&
157  e0->get_event_id() > e1->get_event_id());
158  });
159 
160  bool first = true;
161  hipStream_t lastSeenStream = 0;
162  for (pi_event event : events) {
163  if (!event || (!first && event->get_queue()->stream_ == lastSeenStream)) {
164  continue;
165  }
166 
167  first = false;
168  lastSeenStream = event->get_queue()->stream_;
169 
170  auto result = f(event);
171  if (result != PI_SUCCESS) {
172  return result;
173  }
174  }
175 
176  return PI_SUCCESS;
177 }
178 
186 pi_result check_error(hipError_t result, const char *function, int line,
187  const char *file) {
188  if (result == hipSuccess) {
189  return PI_SUCCESS;
190  }
191 
192  const char *errorString = nullptr;
193  const char *errorName = nullptr;
194  errorName = hipGetErrorName(result);
195  errorString = hipGetErrorString(result);
196  std::cerr << "\nPI HIP ERROR:"
197  << "\n\tValue: " << result
198  << "\n\tName: " << errorName
199  << "\n\tDescription: " << errorString
200  << "\n\tFunction: " << function
201  << "\n\tSource Location: " << file << ":" << line << "\n"
202  << std::endl;
203 
204  if (std::getenv("PI_HIP_ABORT") != nullptr) {
205  std::abort();
206  }
207 
208  throw map_error(result);
209 }
210 
212 #define PI_CHECK_ERROR(result) check_error(result, __func__, __LINE__, __FILE__)
213 
220 class ScopedContext {
221  pi_context placedContext_;
222  hipCtx_t original_;
223  bool needToRecover_;
224 
225 public:
226  ScopedContext(pi_context ctxt) : placedContext_{ctxt}, needToRecover_{false} {
227 
228  if (!placedContext_) {
229  throw PI_ERROR_INVALID_CONTEXT;
230  }
231 
232  hipCtx_t desired = placedContext_->get();
233  PI_CHECK_ERROR(hipCtxGetCurrent(&original_));
234  if (original_ != desired) {
235  // Sets the desired context as the active one for the thread
236  PI_CHECK_ERROR(hipCtxSetCurrent(desired));
237  if (original_ == nullptr) {
238  // No context is installed on the current thread
239  // This is the most common case. We can activate the context in the
240  // thread and leave it there until all the PI context referring to the
241  // same underlying HIP context are destroyed. This emulates
242  // the behaviour of the HIP runtime api, and avoids costly context
243  // switches. No action is required on this side of the if.
244  } else {
245  needToRecover_ = true;
246  }
247  }
248  }
249 
250  ~ScopedContext() {
251  if (needToRecover_) {
252  PI_CHECK_ERROR(hipCtxSetCurrent(original_));
253  }
254  }
255 };
256 
258 template <typename T, typename Assign>
259 pi_result getInfoImpl(size_t param_value_size, void *param_value,
260  size_t *param_value_size_ret, T value, size_t value_size,
261  Assign &&assign_func) {
262 
263  if (param_value != nullptr) {
264 
265  if (param_value_size < value_size) {
266  return PI_ERROR_INVALID_VALUE;
267  }
268 
269  assign_func(param_value, value, value_size);
270  }
271 
272  if (param_value_size_ret != nullptr) {
273  *param_value_size_ret = value_size;
274  }
275 
276  return PI_SUCCESS;
277 }
278 
279 template <typename T>
280 pi_result getInfo(size_t param_value_size, void *param_value,
281  size_t *param_value_size_ret, T value) {
282 
283  auto assignment = [](void *param_value, T value, size_t value_size) {
284  (void)value_size;
285  *static_cast<T *>(param_value) = value;
286  };
287 
288  return getInfoImpl(param_value_size, param_value, param_value_size_ret, value,
289  sizeof(T), std::move(assignment));
290 }
291 
292 template <typename T>
293 pi_result getInfoArray(size_t array_length, size_t param_value_size,
294  void *param_value, size_t *param_value_size_ret,
295  T *value) {
296 
297  auto assignment = [](void *param_value, T *value, size_t value_size) {
298  memcpy(param_value, static_cast<const void *>(value), value_size);
299  };
300 
301  return getInfoImpl(param_value_size, param_value, param_value_size_ret, value,
302  array_length * sizeof(T), std::move(assignment));
303 }
304 
305 template <>
306 pi_result getInfo<const char *>(size_t param_value_size, void *param_value,
307  size_t *param_value_size_ret,
308  const char *value) {
309  return getInfoArray(strlen(value) + 1, param_value_size, param_value,
310  param_value_size_ret, value);
311 }
312 
313 int getAttribute(pi_device device, hipDeviceAttribute_t attribute) {
314  int value;
316  hipDeviceGetAttribute(&value, attribute, device->get()) == hipSuccess);
317  return value;
318 }
320 
321 void simpleGuessLocalWorkSize(size_t *threadsPerBlock,
322  const size_t *global_work_size,
323  const size_t maxThreadsPerBlock[3],
324  pi_kernel kernel) {
325  assert(threadsPerBlock != nullptr);
326  assert(global_work_size != nullptr);
327  assert(kernel != nullptr);
328  // int recommendedBlockSize, minGrid;
329 
330  // PI_CHECK_ERROR(hipOccupancyMaxPotentialBlockSize(
331  // &minGrid, &recommendedBlockSize, kernel->get(),
332  // 0, 0));
333 
334  //(void)minGrid; // Not used, avoid warnings
335 
336  threadsPerBlock[0] = std::min(maxThreadsPerBlock[0], global_work_size[0]);
337 
338  // Find a local work group size that is a divisor of the global
339  // work group size to produce uniform work groups.
340  while (0u != (global_work_size[0] % threadsPerBlock[0])) {
341  --threadsPerBlock[0];
342  }
343 }
344 
345 } // anonymous namespace
346 
349 namespace sycl {
350 namespace detail {
351 namespace pi {
352 
353 // Report error and no return (keeps compiler from printing warnings).
354 // TODO: Probably change that to throw a catchable exception,
355 // but for now it is useful to see every failure.
356 //
357 [[noreturn]] void die(const char *Message) {
358  std::cerr << "pi_die: " << Message << std::endl;
359  std::terminate();
360 }
361 
362 // Reports error messages
363 void hipPrint(const char *Message) {
364  std::cerr << "pi_print: " << Message << std::endl;
365 }
366 
367 void assertion(bool Condition, const char *Message) {
368  if (!Condition)
369  die(Message);
370 }
371 
372 } // namespace pi
373 } // namespace detail
374 } // namespace sycl
375 } // __SYCL_INLINE_NAMESPACE(cl)
376 
377 //--------------
378 // PI object implementation
379 
380 extern "C" {
381 
382 // Required in a number of functions, so forward declare here
384  pi_uint32 num_events_in_wait_list,
385  const pi_event *event_wait_list,
386  pi_event *event);
388  pi_uint32 num_events_in_wait_list,
389  const pi_event *event_wait_list,
390  pi_event *event);
393 
394 } // extern "C"
395 
397 
399  : commandType_{type}, refCount_{1}, isCompleted_{false}, isRecorded_{false},
400  isStarted_{false}, evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr},
401  queue_{queue}, context_{context} {
402 
403  assert(type != PI_COMMAND_TYPE_USER);
404 
405  bool profilingEnabled = queue_->properties_ & PI_QUEUE_PROFILING_ENABLE;
406 
407  PI_CHECK_ERROR(hipEventCreateWithFlags(
408  &evEnd_, profilingEnabled ? hipEventDefault : hipEventDisableTiming));
409 
410  if (profilingEnabled) {
411  PI_CHECK_ERROR(hipEventCreateWithFlags(&evQueued_, hipEventDefault));
412  PI_CHECK_ERROR(hipEventCreateWithFlags(&evStart_, hipEventDefault));
413  }
414 
415  if (queue_ != nullptr) {
416  hip_piQueueRetain(queue_);
417  }
418  hip_piContextRetain(context_);
419 }
420 
422  if (queue_ != nullptr) {
423  hip_piQueueRelease(queue_);
424  }
425  hip_piContextRelease(context_);
426 }
427 
429  assert(!is_started());
430  pi_result result = PI_SUCCESS;
431 
432  try {
433  if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
434  // NOTE: This relies on the default stream to be unused.
435  PI_CHECK_ERROR(hipEventRecord(evQueued_, 0));
436  PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get()));
437  }
438  } catch (pi_result error) {
439  result = error;
440  }
441 
442  isStarted_ = true;
443  return result;
444 }
445 
446 bool _pi_event::is_completed() const noexcept {
447  if (!isRecorded_) {
448  return false;
449  }
450  if (!isCompleted_) {
451  const hipError_t ret = hipEventQuery(evEnd_);
452  if (ret != hipSuccess && ret != hipErrorNotReady) {
453  PI_CHECK_ERROR(ret);
454  return false;
455  }
456  if (ret == hipErrorNotReady) {
457  return false;
458  }
459  }
460  return true;
461 }
462 
464  float miliSeconds = 0.0f;
465  assert(is_started());
466 
467  PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, evStart_, evEnd_));
468  return static_cast<pi_uint64>(miliSeconds * 1.0e6);
469 }
470 
472  float miliSeconds = 0.0f;
473  assert(is_started());
474 
475  PI_CHECK_ERROR(
476  hipEventElapsedTime(&miliSeconds, context_->evBase_, evStart_));
477  return static_cast<pi_uint64>(miliSeconds * 1.0e6);
478 }
479 
481  float miliSeconds = 0.0f;
482  assert(is_started() && is_recorded());
483 
484  PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, context_->evBase_, evEnd_));
485  return static_cast<pi_uint64>(miliSeconds * 1.0e6);
486 }
487 
489 
490  if (is_recorded() || !is_started()) {
491  return PI_ERROR_INVALID_EVENT;
492  }
493 
494  pi_result result = PI_ERROR_INVALID_OPERATION;
495 
496  if (!queue_) {
497  return PI_ERROR_INVALID_QUEUE;
498  }
499 
500  hipStream_t hipStream = queue_->get();
501 
502  try {
503  eventId_ = queue_->get_next_event_id();
504  if (eventId_ == 0) {
506  "Unrecoverable program state reached in event identifier overflow");
507  }
508  result = PI_CHECK_ERROR(hipEventRecord(evEnd_, hipStream));
509  } catch (pi_result error) {
510  result = error;
511  }
512 
513  if (result == PI_SUCCESS) {
514  isRecorded_ = true;
515  }
516 
517  return result;
518 }
519 
521  pi_result retErr;
522  try {
523  retErr = PI_CHECK_ERROR(hipEventSynchronize(evEnd_));
524  isCompleted_ = true;
525  } catch (pi_result error) {
526  retErr = error;
527  }
528 
529  return retErr;
530 }
531 
533  assert(queue_ != nullptr);
534  PI_CHECK_ERROR(hipEventDestroy(evEnd_));
535 
536  if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
537  PI_CHECK_ERROR(hipEventDestroy(evQueued_));
538  PI_CHECK_ERROR(hipEventDestroy(evStart_));
539  }
540 
541  return PI_SUCCESS;
542 }
543 
544 // makes all future work submitted to queue wait for all work captured in event.
546  // for native events, the hipStreamWaitEvent call is used.
547  // This makes all future work submitted to stream wait for all
548  // work captured in event.
549  return PI_CHECK_ERROR(hipStreamWaitEvent(queue->get(), event->get(), 0));
550 }
551 
553  : module_{nullptr}, binary_{},
554  binarySizeInBytes_{0}, refCount_{1}, context_{ctxt} {
555  hip_piContextRetain(context_);
556 }
557 
559 
560 pi_result _pi_program::set_binary(const char *source, size_t length) {
561  assert((binary_ == nullptr && binarySizeInBytes_ == 0) &&
562  "Re-setting program binary data which has already been set");
563  binary_ = source;
565  return PI_SUCCESS;
566 }
567 
568 pi_result _pi_program::build_program(const char *build_options) {
569 
570  this->buildOptions_ = build_options;
571 
572  constexpr const unsigned int numberOfOptions = 4u;
573 
574  hipJitOption options[numberOfOptions];
575  void *optionVals[numberOfOptions];
576 
577  // Pass a buffer for info messages
578  options[0] = hipJitOptionInfoLogBuffer;
579  optionVals[0] = (void *)infoLog_;
580  // Pass the size of the info buffer
581  options[1] = hipJitOptionInfoLogBufferSizeBytes;
582  optionVals[1] = (void *)(long)MAX_LOG_SIZE;
583  // Pass a buffer for error message
584  options[2] = hipJitOptionErrorLogBuffer;
585  optionVals[2] = (void *)errorLog_;
586  // Pass the size of the error buffer
587  options[3] = hipJitOptionErrorLogBufferSizeBytes;
588  optionVals[3] = (void *)(long)MAX_LOG_SIZE;
589 
590  auto result = PI_CHECK_ERROR(
591  hipModuleLoadDataEx(&module_, static_cast<const void *>(binary_),
592  numberOfOptions, options, optionVals));
593 
594  const auto success = (result == PI_SUCCESS);
595 
596  buildStatus_ =
598 
599  // If no exception, result is correct
600  return success ? PI_SUCCESS : PI_ERROR_BUILD_PROGRAM_FAILURE;
601 }
602 
608 std::string getKernelNames(pi_program program) {
609  (void)program;
610  cl::sycl::detail::pi::die("getKernelNames not implemented");
611  return {};
612 }
613 
618 template <typename T> class ReleaseGuard {
619 private:
620  T Captive;
621 
622  static pi_result callRelease(pi_device Captive) {
623  return hip_piDeviceRelease(Captive);
624  }
625 
626  static pi_result callRelease(pi_context Captive) {
627  return hip_piContextRelease(Captive);
628  }
629 
630  static pi_result callRelease(pi_mem Captive) {
631  return hip_piMemRelease(Captive);
632  }
633 
634  static pi_result callRelease(pi_program Captive) {
635  return hip_piProgramRelease(Captive);
636  }
637 
638  static pi_result callRelease(pi_kernel Captive) {
639  return hip_piKernelRelease(Captive);
640  }
641 
642  static pi_result callRelease(pi_queue Captive) {
643  return hip_piQueueRelease(Captive);
644  }
645 
646  static pi_result callRelease(pi_event Captive) {
647  return hip_piEventRelease(Captive);
648  }
649 
650 public:
651  ReleaseGuard() = delete;
653  explicit ReleaseGuard(T Obj) : Captive(Obj) {}
654  ReleaseGuard(ReleaseGuard &&Other) noexcept : Captive(Other.Captive) {
655  Other.Captive = nullptr;
656  }
657 
658  ReleaseGuard(const ReleaseGuard &) = delete;
659 
663  if (Captive != nullptr) {
664  pi_result ret = callRelease(Captive);
665  if (ret != PI_SUCCESS) {
666  // A reported HIP error is either an implementation or an asynchronous
667  // HIP error for which it is unclear if the function that reported it
668  // succeeded or not. Either way, the state of the program is compromised
669  // and likely unrecoverable.
671  "Unrecoverable program state reached in hip_piMemRelease");
672  }
673  }
674  }
675 
676  ReleaseGuard &operator=(const ReleaseGuard &) = delete;
677 
679  Captive = Other.Captive;
680  Other.Captive = nullptr;
681  return *this;
682  }
683 
686  void dismiss() { Captive = nullptr; }
687 };
688 
689 //-- PI API implementation
690 extern "C" {
691 
701  pi_uint32 *num_platforms) {
702 
703  try {
704  static std::once_flag initFlag;
705  static pi_uint32 numPlatforms = 1;
706  static std::vector<_pi_platform> platformIds;
707 
708  if (num_entries == 0 and platforms != nullptr) {
709  return PI_ERROR_INVALID_VALUE;
710  }
711  if (platforms == nullptr and num_platforms == nullptr) {
712  return PI_ERROR_INVALID_VALUE;
713  }
714 
715  pi_result err = PI_SUCCESS;
716 
717  std::call_once(
718  initFlag,
719  [](pi_result &err) {
720  if (hipInit(0) != hipSuccess) {
721  numPlatforms = 0;
722  return;
723  }
724  int numDevices = 0;
725  hipError_t hipErrorCode = hipGetDeviceCount(&numDevices);
726  if (hipErrorCode == hipErrorNoDevice) {
727  numPlatforms = 0;
728  return;
729  }
730  err = PI_CHECK_ERROR(hipErrorCode);
731  if (numDevices == 0) {
732  numPlatforms = 0;
733  return;
734  }
735  try {
736  numPlatforms = numDevices;
737  platformIds.resize(numDevices);
738 
739  for (int i = 0; i < numDevices; ++i) {
740  hipDevice_t device;
741  err = PI_CHECK_ERROR(hipDeviceGet(&device, i));
742  platformIds[i].devices_.emplace_back(
743  new _pi_device{device, &platformIds[i]});
744  }
745  } catch (const std::bad_alloc &) {
746  // Signal out-of-memory situation
747  for (int i = 0; i < numDevices; ++i) {
748  platformIds[i].devices_.clear();
749  }
750  platformIds.clear();
751  err = PI_ERROR_OUT_OF_HOST_MEMORY;
752  } catch (...) {
753  // Clear and rethrow to allow retry
754  for (int i = 0; i < numDevices; ++i) {
755  platformIds[i].devices_.clear();
756  }
757  platformIds.clear();
758  throw;
759  }
760  },
761  err);
762 
763  if (num_platforms != nullptr) {
764  *num_platforms = numPlatforms;
765  }
766 
767  if (platforms != nullptr) {
768  for (unsigned i = 0; i < std::min(num_entries, numPlatforms); ++i) {
769  platforms[i] = &platformIds[i];
770  }
771  }
772 
773  return err;
774  } catch (pi_result err) {
775  return err;
776  } catch (...) {
777  return PI_ERROR_OUT_OF_RESOURCES;
778  }
779 }
780 
782  pi_platform_info param_name,
783  size_t param_value_size, void *param_value,
784  size_t *param_value_size_ret) {
785  assert(platform != nullptr);
786 
787  switch (param_name) {
789  return getInfo(param_value_size, param_value, param_value_size_ret,
790  "AMD HIP BACKEND");
792  return getInfo(param_value_size, param_value, param_value_size_ret,
793  "AMD Corporation");
795  return getInfo(param_value_size, param_value, param_value_size_ret,
796  "FULL PROFILE");
798  auto version = getHipVersionString();
799  return getInfo(param_value_size, param_value, param_value_size_ret,
800  version.c_str());
801  }
803  return getInfo(param_value_size, param_value, param_value_size_ret, "");
804  }
805  default:
807  }
808  cl::sycl::detail::pi::die("Platform info request not implemented");
809  return {};
810 }
811 
818  pi_uint32 num_entries, pi_device *devices,
819  pi_uint32 *num_devices) {
820 
821  pi_result err = PI_SUCCESS;
822  const bool askingForDefault = device_type == PI_DEVICE_TYPE_DEFAULT;
823  const bool askingForGPU = device_type & PI_DEVICE_TYPE_GPU;
824  const bool returnDevices = askingForDefault || askingForGPU;
825 
826  size_t numDevices = returnDevices ? platform->devices_.size() : 0;
827 
828  try {
829  if (num_devices) {
830  *num_devices = numDevices;
831  }
832 
833  if (returnDevices && devices) {
834  for (size_t i = 0; i < std::min(size_t(num_entries), numDevices); ++i) {
835  devices[i] = platform->devices_[i].get();
836  }
837  }
838 
839  return err;
840  } catch (pi_result err) {
841  return err;
842  } catch (...) {
843  return PI_ERROR_OUT_OF_RESOURCES;
844  }
845 }
846 
850  (void)device;
851  return PI_SUCCESS;
852 }
853 
855  size_t param_value_size, void *param_value,
856  size_t *param_value_size_ret) {
857 
858  switch (param_name) {
860  return getInfo(param_value_size, param_value, param_value_size_ret, 1);
862  return getInfo(param_value_size, param_value, param_value_size_ret,
863  context->get_device());
865  return getInfo(param_value_size, param_value, param_value_size_ret,
866  context->get_reference_count());
868  default:
870  }
871 
872  return PI_ERROR_OUT_OF_RESOURCES;
873 }
874 
876  assert(context != nullptr);
877  assert(context->get_reference_count() > 0);
878 
879  context->increment_reference_count();
880  return PI_SUCCESS;
881 }
882 
884  pi_context context, pi_context_extended_deleter function, void *user_data) {
885  context->set_extended_deleter(function, user_data);
886  return PI_SUCCESS;
887 }
888 
892  const pi_device_partition_property *properties,
893  pi_uint32 num_devices, pi_device *out_devices,
894  pi_uint32 *out_num_devices) {
895  (void)device;
896  (void)properties;
897  (void)num_devices;
898  (void)out_devices;
899  (void)out_num_devices;
900 
901  return PI_ERROR_INVALID_OPERATION;
902 }
903 
907  pi_device_binary *binaries,
908  pi_uint32 num_binaries,
909  pi_uint32 *selected_binary) {
910  (void)device;
911  if (!binaries) {
912  cl::sycl::detail::pi::die("No list of device images provided");
913  }
914  if (num_binaries < 1) {
915  cl::sycl::detail::pi::die("No binary images in the list");
916  }
917 
918  // Look for an image for the HIP target, and return the first one that is
919  // found
920 #if defined(__HIP_PLATFORM_AMD__)
921  const char *binary_type = __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN;
922 #elif defined(__HIP_PLATFORM_NVIDIA__)
923  const char *binary_type = __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64;
924 #else
925 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
926 #endif
927 
928  for (pi_uint32 i = 0; i < num_binaries; i++) {
929  if (strcmp(binaries[i]->DeviceTargetSpec, binary_type) == 0) {
930  *selected_binary = i;
931  return PI_SUCCESS;
932  }
933  }
934 
935  // No image can be loaded for the given device
936  return PI_ERROR_INVALID_BINARY;
937 }
938 
940  pi_program program,
941  const char *func_name,
942  pi_uint64 *func_pointer_ret) {
943  // Check if device passed is the same the device bound to the context
944  assert(device == program->get_context()->get_device());
945  assert(func_pointer_ret != nullptr);
946 
947  hipFunction_t func;
948  hipError_t ret = hipModuleGetFunction(&func, program->get(), func_name);
949  *func_pointer_ret = reinterpret_cast<pi_uint64>(func);
950  pi_result retError = PI_SUCCESS;
951 
952  if (ret != hipSuccess && ret != hipErrorNotFound)
953  retError = PI_CHECK_ERROR(ret);
954  if (ret == hipErrorNotFound) {
955  *func_pointer_ret = 0;
956  retError = PI_ERROR_INVALID_KERNEL_NAME;
957  }
958 
959  return retError;
960 }
961 
965  (void)device;
966  return PI_SUCCESS;
967 }
968 
970  size_t param_value_size, void *param_value,
971  size_t *param_value_size_ret) {
972 
973  static constexpr pi_uint32 max_work_item_dimensions = 3u;
974 
975  assert(device != nullptr);
976 
977  switch (param_name) {
978  case PI_DEVICE_INFO_TYPE: {
979  return getInfo(param_value_size, param_value, param_value_size_ret,
981  }
983 #if defined(__HIP_PLATFORM_AMD__)
984  pi_uint32 vendor_id = 4098u;
985 #elif defined(__HIP_PLATFORM_NVIDIA__)
986  pi_uint32 vendor_id = 4318u;
987 #else
988  pi_uint32 vendor_id = 0u;
989 #endif
990 
991  return getInfo(param_value_size, param_value, param_value_size_ret,
992  vendor_id);
993  }
995  int compute_units = 0;
997  hipDeviceGetAttribute(&compute_units,
998  hipDeviceAttributeMultiprocessorCount,
999  device->get()) == hipSuccess);
1000  cl::sycl::detail::pi::assertion(compute_units >= 0);
1001  return getInfo(param_value_size, param_value, param_value_size_ret,
1002  pi_uint32(compute_units));
1003  }
1005  return getInfo(param_value_size, param_value, param_value_size_ret,
1006  max_work_item_dimensions);
1007  }
1009  size_t return_sizes[max_work_item_dimensions];
1010 
1011  int max_x = 0, max_y = 0, max_z = 0;
1013  hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxBlockDimX,
1014  device->get()) == hipSuccess);
1015  cl::sycl::detail::pi::assertion(max_x >= 0);
1016 
1018  hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxBlockDimY,
1019  device->get()) == hipSuccess);
1020  cl::sycl::detail::pi::assertion(max_y >= 0);
1021 
1023  hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxBlockDimZ,
1024  device->get()) == hipSuccess);
1025  cl::sycl::detail::pi::assertion(max_z >= 0);
1026 
1027  return_sizes[0] = size_t(max_x);
1028  return_sizes[1] = size_t(max_y);
1029  return_sizes[2] = size_t(max_z);
1030  return getInfoArray(max_work_item_dimensions, param_value_size, param_value,
1031  param_value_size_ret, return_sizes);
1032  }
1033 
1035  size_t return_sizes[max_work_item_dimensions];
1036  int max_x = 0, max_y = 0, max_z = 0;
1038  hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxGridDimX,
1039  device->get()) == hipSuccess);
1040  cl::sycl::detail::pi::assertion(max_x >= 0);
1041 
1043  hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxGridDimY,
1044  device->get()) == hipSuccess);
1045  cl::sycl::detail::pi::assertion(max_y >= 0);
1046 
1048  hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxGridDimZ,
1049  device->get()) == hipSuccess);
1050  cl::sycl::detail::pi::assertion(max_z >= 0);
1051 
1052  return_sizes[0] = size_t(max_x);
1053  return_sizes[1] = size_t(max_y);
1054  return_sizes[2] = size_t(max_z);
1055  return getInfoArray(max_work_item_dimensions, param_value_size, param_value,
1056  param_value_size_ret, return_sizes);
1057  }
1058 
1060  int max_work_group_size = 0;
1062  hipDeviceGetAttribute(&max_work_group_size,
1063  hipDeviceAttributeMaxThreadsPerBlock,
1064  device->get()) == hipSuccess);
1065 
1066  cl::sycl::detail::pi::assertion(max_work_group_size >= 0);
1067 
1068  return getInfo(param_value_size, param_value, param_value_size_ret,
1069  size_t(max_work_group_size));
1070  }
1072  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1073  }
1075  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1076  }
1078  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1079  }
1081  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1082  }
1084  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1085  }
1087  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1088  }
1090  return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1091  }
1093  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1094  }
1096  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1097  }
1099  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1100  }
1102  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1103  }
1105  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1106  }
1108  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1109  }
1111  return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1112  }
1114  // Number of sub-groups = max block size / warp size + possible remainder
1115  int max_threads = 0;
1117  hipDeviceGetAttribute(&max_threads,
1118  hipDeviceAttributeMaxThreadsPerBlock,
1119  device->get()) == hipSuccess);
1120  int warpSize = 0;
1122  hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
1123  device->get()) == hipSuccess);
1124  int maxWarps = (max_threads + warpSize - 1) / warpSize;
1125  return getInfo(param_value_size, param_value, param_value_size_ret,
1126  static_cast<uint32_t>(maxWarps));
1127  }
1129  // Volta provides independent thread scheduling
1130  // TODO: Revisit for previous generation GPUs
1131  int major = 0;
1133  hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor,
1134  device->get()) == hipSuccess);
1135  bool ifp = (major >= 7);
1136  return getInfo(param_value_size, param_value, param_value_size_ret, ifp);
1137  }
1139  int warpSize = 0;
1141  hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
1142  device->get()) == hipSuccess);
1143  size_t sizes[1] = {static_cast<size_t>(warpSize)};
1144  return getInfoArray<size_t>(1, param_value_size, param_value,
1145  param_value_size_ret, sizes);
1146  }
1148  int clock_freq = 0;
1150  hipDeviceGetAttribute(&clock_freq, hipDeviceAttributeClockRate,
1151  device->get()) == hipSuccess);
1152  cl::sycl::detail::pi::assertion(clock_freq >= 0);
1153  return getInfo(param_value_size, param_value, param_value_size_ret,
1154  pi_uint32(clock_freq) / 1000u);
1155  }
1157  auto bits = pi_uint32{std::numeric_limits<uintptr_t>::digits};
1158  return getInfo(param_value_size, param_value, param_value_size_ret, bits);
1159  }
1161  // Max size of memory object allocation in bytes.
1162  // The minimum value is max(min(1024 × 1024 ×
1163  // 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE),
1164  // 32 × 1024 × 1024) for devices that are not of type
1165  // CL_DEVICE_TYPE_HIPSTOM.
1166 
1167  size_t global = 0;
1168  cl::sycl::detail::pi::assertion(hipDeviceTotalMem(&global, device->get()) ==
1169  hipSuccess);
1170 
1171  auto quarter_global = static_cast<pi_uint32>(global / 4u);
1172 
1173  auto max_alloc = std::max(std::min(1024u * 1024u * 1024u, quarter_global),
1174  32u * 1024u * 1024u);
1175 
1176  return getInfo(param_value_size, param_value, param_value_size_ret,
1177  pi_uint64{max_alloc});
1178  }
1180  return getInfo(param_value_size, param_value, param_value_size_ret,
1181  PI_TRUE);
1182  }
1184  // This call doesn't match to HIP as it doesn't have images, but instead
1185  // surfaces and textures. No clear call in the HIP API to determine this,
1186  // but some searching found as of SM 2.x 128 are supported.
1187  return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1188  }
1190  // This call doesn't match to HIP as it doesn't have images, but instead
1191  // surfaces and textures. No clear call in the HIP API to determine this,
1192  // but some searching found as of SM 2.x 128 are supported.
1193  return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1194  }
1195 
1197  // Take the smaller of maximum surface and maximum texture height.
1198  int tex_height = 0;
1200  hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture2DHeight,
1201  device->get()) == hipSuccess);
1202  cl::sycl::detail::pi::assertion(tex_height >= 0);
1203  int surf_height = 0;
1205  hipDeviceGetAttribute(&surf_height,
1206  hipDeviceAttributeMaxTexture2DHeight,
1207  device->get()) == hipSuccess);
1208  cl::sycl::detail::pi::assertion(surf_height >= 0);
1209 
1210  int min = std::min(tex_height, surf_height);
1211 
1212  return getInfo(param_value_size, param_value, param_value_size_ret, min);
1213  }
1215  // Take the smaller of maximum surface and maximum texture width.
1216  int tex_width = 0;
1218  hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture2DWidth,
1219  device->get()) == hipSuccess);
1220  cl::sycl::detail::pi::assertion(tex_width >= 0);
1221  int surf_width = 0;
1223  hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture2DWidth,
1224  device->get()) == hipSuccess);
1225  cl::sycl::detail::pi::assertion(surf_width >= 0);
1226 
1227  int min = std::min(tex_width, surf_width);
1228 
1229  return getInfo(param_value_size, param_value, param_value_size_ret, min);
1230  }
1232  // Take the smaller of maximum surface and maximum texture height.
1233  int tex_height = 0;
1235  hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture3DHeight,
1236  device->get()) == hipSuccess);
1237  cl::sycl::detail::pi::assertion(tex_height >= 0);
1238  int surf_height = 0;
1240  hipDeviceGetAttribute(&surf_height,
1241  hipDeviceAttributeMaxTexture3DHeight,
1242  device->get()) == hipSuccess);
1243  cl::sycl::detail::pi::assertion(surf_height >= 0);
1244 
1245  int min = std::min(tex_height, surf_height);
1246 
1247  return getInfo(param_value_size, param_value, param_value_size_ret, min);
1248  }
1250  // Take the smaller of maximum surface and maximum texture width.
1251  int tex_width = 0;
1253  hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture3DWidth,
1254  device->get()) == hipSuccess);
1255  cl::sycl::detail::pi::assertion(tex_width >= 0);
1256  int surf_width = 0;
1258  hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture3DWidth,
1259  device->get()) == hipSuccess);
1260  cl::sycl::detail::pi::assertion(surf_width >= 0);
1261 
1262  int min = std::min(tex_width, surf_width);
1263 
1264  return getInfo(param_value_size, param_value, param_value_size_ret, min);
1265  }
1267  // Take the smaller of maximum surface and maximum texture depth.
1268  int tex_depth = 0;
1270  hipDeviceGetAttribute(&tex_depth, hipDeviceAttributeMaxTexture3DDepth,
1271  device->get()) == hipSuccess);
1272  cl::sycl::detail::pi::assertion(tex_depth >= 0);
1273  int surf_depth = 0;
1275  hipDeviceGetAttribute(&surf_depth, hipDeviceAttributeMaxTexture3DDepth,
1276  device->get()) == hipSuccess);
1277  cl::sycl::detail::pi::assertion(surf_depth >= 0);
1278 
1279  int min = std::min(tex_depth, surf_depth);
1280 
1281  return getInfo(param_value_size, param_value, param_value_size_ret, min);
1282  }
1284  // Take the smaller of maximum surface and maximum texture width.
1285  int tex_width = 0;
1287  hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture1DWidth,
1288  device->get()) == hipSuccess);
1289  cl::sycl::detail::pi::assertion(tex_width >= 0);
1290  int surf_width = 0;
1292  hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture1DWidth,
1293  device->get()) == hipSuccess);
1294  cl::sycl::detail::pi::assertion(surf_width >= 0);
1295 
1296  int min = std::min(tex_width, surf_width);
1297 
1298  return getInfo(param_value_size, param_value, param_value_size_ret, min);
1299  }
1301  return getInfo(param_value_size, param_value, param_value_size_ret,
1302  size_t(0));
1303  }
1305  // This call is kind of meaningless for HIP, as samplers don't exist.
1306  // Closest thing is textures, which is 128.
1307  return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1308  }
1310  // __global__ function parameters are passed to the device via constant
1311  // memory and are limited to 4 KB.
1312  return getInfo(param_value_size, param_value, param_value_size_ret,
1313  size_t{4000u});
1314  }
1316  int mem_base_addr_align = 0;
1318  hipDeviceGetAttribute(&mem_base_addr_align,
1319  hipDeviceAttributeTextureAlignment,
1320  device->get()) == hipSuccess);
1321  // Multiply by 8 as clGetDeviceInfo returns this value in bits
1322  mem_base_addr_align *= 8;
1323  return getInfo(param_value_size, param_value, param_value_size_ret,
1324  mem_base_addr_align);
1325  }
1327  return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1328  }
1333  return getInfo(param_value_size, param_value, param_value_size_ret, config);
1334  }
1338  return getInfo(param_value_size, param_value, param_value_size_ret, config);
1339  }
1341  return getInfo(param_value_size, param_value, param_value_size_ret,
1343  }
1345  // The value is dohipmented for all existing GPUs in the HIP programming
1346  // guidelines, section "H.3.2. Global Memory".
1347  return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1348  }
1350  int cache_size = 0;
1352  hipDeviceGetAttribute(&cache_size, hipDeviceAttributeL2CacheSize,
1353  device->get()) == hipSuccess);
1354  cl::sycl::detail::pi::assertion(cache_size >= 0);
1355  // The L2 cache is global to the GPU.
1356  return getInfo(param_value_size, param_value, param_value_size_ret,
1357  pi_uint64(cache_size));
1358  }
1360  size_t bytes = 0;
1361  // Runtime API has easy access to this value, driver API info is scarse.
1362  cl::sycl::detail::pi::assertion(hipDeviceTotalMem(&bytes, device->get()) ==
1363  hipSuccess);
1364  return getInfo(param_value_size, param_value, param_value_size_ret,
1365  pi_uint64{bytes});
1366  }
1368  unsigned int constant_memory = 0;
1369 
1370  // hipDeviceGetAttribute takes a int*, however the size of the constant
1371  // memory on AMD GPU may be larger than what can fit in the positive part
1372  // of a signed integer, so use an unsigned integer and cast the pointer to
1373  // int*.
1375  hipDeviceGetAttribute(reinterpret_cast<int *>(&constant_memory),
1376  hipDeviceAttributeTotalConstantMemory,
1377  device->get()) == hipSuccess);
1378 
1379  return getInfo(param_value_size, param_value, param_value_size_ret,
1380  pi_uint64(constant_memory));
1381  }
1383  // TODO: is there a way to retrieve this from HIP driver API?
1384  // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX
1385  // 1060 3GB
1386  return getInfo(param_value_size, param_value, param_value_size_ret, 9u);
1387  }
1389  return getInfo(param_value_size, param_value, param_value_size_ret,
1391  }
1393  // OpenCL's "local memory" maps most closely to HIP's "shared memory".
1394  // HIP has its own definition of "local memory", which maps to OpenCL's
1395  // "private memory".
1396  int local_mem_size = 0;
1398  hipDeviceGetAttribute(&local_mem_size,
1399  hipDeviceAttributeMaxSharedMemoryPerBlock,
1400  device->get()) == hipSuccess);
1401  cl::sycl::detail::pi::assertion(local_mem_size >= 0);
1402  return getInfo(param_value_size, param_value, param_value_size_ret,
1403  pi_uint64(local_mem_size));
1404  }
1406  int ecc_enabled = 0;
1408  hipDeviceGetAttribute(&ecc_enabled, hipDeviceAttributeEccEnabled,
1409  device->get()) == hipSuccess);
1410 
1411  cl::sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1));
1412  auto result = static_cast<pi_bool>(ecc_enabled);
1413  return getInfo(param_value_size, param_value, param_value_size_ret, result);
1414  }
1416  int is_integrated = 0;
1418  hipDeviceGetAttribute(&is_integrated, hipDeviceAttributeIntegrated,
1419  device->get()) == hipSuccess);
1420 
1421  cl::sycl::detail::pi::assertion((is_integrated == 0) |
1422  (is_integrated == 1));
1423  auto result = static_cast<pi_bool>(is_integrated);
1424  return getInfo(param_value_size, param_value, param_value_size_ret, result);
1425  }
1427  // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX
1428  // 1060 3GB
1429  return getInfo(param_value_size, param_value, param_value_size_ret,
1430  size_t{1000u});
1431  }
1433  return getInfo(param_value_size, param_value, param_value_size_ret,
1434  PI_TRUE);
1435  }
1436  case PI_DEVICE_INFO_AVAILABLE: {
1437  return getInfo(param_value_size, param_value, param_value_size_ret,
1438  PI_TRUE);
1439  }
1441  return getInfo(param_value_size, param_value, param_value_size_ret,
1442  PI_TRUE);
1443  }
1445  return getInfo(param_value_size, param_value, param_value_size_ret,
1446  PI_TRUE);
1447  }
1449  return getInfo(param_value_size, param_value, param_value_size_ret,
1450  PI_TRUE);
1451  }
1453  auto capability = PI_DEVICE_EXEC_CAPABILITIES_KERNEL;
1454  return getInfo(param_value_size, param_value, param_value_size_ret,
1455  capability);
1456  }
1458  // The mandated minimum capability:
1459  auto capability =
1461  return getInfo(param_value_size, param_value, param_value_size_ret,
1462  capability);
1463  }
1465  // The mandated minimum capability:
1466  auto capability = PI_QUEUE_PROFILING_ENABLE;
1467  return getInfo(param_value_size, param_value, param_value_size_ret,
1468  capability);
1469  }
1471  // An empty string is returned if no built-in kernels are supported by the
1472  // device.
1473  return getInfo(param_value_size, param_value, param_value_size_ret, "");
1474  }
1475  case PI_DEVICE_INFO_PLATFORM: {
1476  return getInfo(param_value_size, param_value, param_value_size_ret,
1477  device->get_platform());
1478  }
1479  case PI_DEVICE_INFO_NAME: {
1480  static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u;
1481  char name[MAX_DEVICE_NAME_LENGTH];
1483  hipDeviceGetName(name, MAX_DEVICE_NAME_LENGTH, device->get()) ==
1484  hipSuccess);
1485 
1486  // On AMD GPUs hipDeviceGetName returns an empty string, so return the arch
1487  // name instead, this is also what AMD OpenCL devices return.
1488  if (strlen(name) == 0) {
1489  hipDeviceProp_t props;
1491  hipGetDeviceProperties(&props, device->get()) == hipSuccess);
1492 
1493  return getInfoArray(strlen(props.gcnArchName) + 1, param_value_size,
1494  param_value, param_value_size_ret, props.gcnArchName);
1495  }
1496  return getInfoArray(strlen(name) + 1, param_value_size, param_value,
1497  param_value_size_ret, name);
1498  }
1499  case PI_DEVICE_INFO_VENDOR: {
1500  return getInfo(param_value_size, param_value, param_value_size_ret,
1501  "AMD Corporation");
1502  }
1504  auto version = getHipVersionString();
1505  return getInfo(param_value_size, param_value, param_value_size_ret,
1506  version.c_str());
1507  }
1508  case PI_DEVICE_INFO_PROFILE: {
1509  return getInfo(param_value_size, param_value, param_value_size_ret, "HIP");
1510  }
1512  return getInfo(param_value_size, param_value, param_value_size_ret,
1513  device->get_reference_count());
1514  }
1515  case PI_DEVICE_INFO_VERSION: {
1516  return getInfo(param_value_size, param_value, param_value_size_ret,
1517  "PI 0.0");
1518  }
1520  return getInfo(param_value_size, param_value, param_value_size_ret, "");
1521  }
1523  // TODO: Remove comment when HIP support native asserts.
1524  // DEVICELIB_ASSERT extension is set so fallback assert
1525  // postprocessing is NOP. HIP 4.3 docs indicate support for
1526  // native asserts are in progress
1527  std::string SupportedExtensions = "";
1528  SupportedExtensions += PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT;
1529  SupportedExtensions += " ";
1530 
1531  return getInfo(param_value_size, param_value, param_value_size_ret,
1532  SupportedExtensions.c_str());
1533  }
1535  // The minimum value for the FULL profile is 1 MB.
1536  return getInfo(param_value_size, param_value, param_value_size_ret,
1537  size_t{1024u});
1538  }
1540  return getInfo(param_value_size, param_value, param_value_size_ret,
1541  PI_TRUE);
1542  }
1544  return getInfo(param_value_size, param_value, param_value_size_ret,
1545  nullptr);
1546  }
1548  return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1549  }
1551  return getInfo(param_value_size, param_value, param_value_size_ret,
1552  static_cast<pi_device_partition_property>(0u));
1553  }
1555  return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1556  }
1558  return getInfo(param_value_size, param_value, param_value_size_ret,
1559  static_cast<pi_device_partition_property>(0u));
1560  }
1561 
1562  // Intel USM extensions
1563 
1565  // from cl_intel_unified_shared_memory: "The host memory access capabilities
1566  // apply to any host allocation."
1567  //
1568  // query if/how the device can access page-locked host memory, possibly
1569  // through PCIe, using the same pointer as the host
1570  pi_bitfield value = {};
1571  // if (getAttribute(device, HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) {
1572  // the device shares a unified address space with the host
1573  if (getAttribute(device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
1574  // compute capability 6.x introduces operations that are atomic with
1575  // respect to other CPUs and GPUs in the system
1578  } else {
1579  // on GPU architectures with compute capability lower than 6.x, atomic
1580  // operations from the GPU to CPU memory will not be atomic with respect
1581  // to CPU initiated atomic operations
1583  }
1584  //}
1585  return getInfo(param_value_size, param_value, param_value_size_ret, value);
1586  }
1588  // from cl_intel_unified_shared_memory:
1589  // "The device memory access capabilities apply to any device allocation
1590  // associated with this device."
1591  //
1592  // query how the device can access memory allocated on the device itself (?)
1596  return getInfo(param_value_size, param_value, param_value_size_ret, value);
1597  }
1599  // from cl_intel_unified_shared_memory:
1600  // "The single device shared memory access capabilities apply to any shared
1601  // allocation associated with this device."
1602  //
1603  // query if/how the device can access managed memory associated to it
1604  pi_bitfield value = {};
1605  if (getAttribute(device, hipDeviceAttributeManagedMemory)) {
1606  // the device can allocate managed memory on this system
1608  }
1609  if (getAttribute(device, hipDeviceAttributeConcurrentManagedAccess)) {
1610  // the device can coherently access managed memory concurrently with the
1611  // CPU
1612  value |= PI_USM_CONCURRENT_ACCESS;
1613  if (getAttribute(device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
1614  // compute capability 6.x introduces operations that are atomic with
1615  // respect to other CPUs and GPUs in the system
1617  }
1618  }
1619  return getInfo(param_value_size, param_value, param_value_size_ret, value);
1620  }
1622  // from cl_intel_unified_shared_memory:
1623  // "The cross-device shared memory access capabilities apply to any shared
1624  // allocation associated with this device, or to any shared memory
1625  // allocation on another device that also supports the same cross-device
1626  // shared memory access capability."
1627  //
1628  // query if/how the device can access managed memory associated to other
1629  // devices
1630  pi_bitfield value = {};
1631  if (getAttribute(device, hipDeviceAttributeManagedMemory)) {
1632  // the device can allocate managed memory on this system
1633  value |= PI_USM_ACCESS;
1634  }
1635  if (getAttribute(device, hipDeviceAttributeConcurrentManagedAccess)) {
1636  // all devices with the CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS
1637  // attribute can coherently access managed memory concurrently with the
1638  // CPU
1639  value |= PI_USM_CONCURRENT_ACCESS;
1640  }
1641  if (getAttribute(device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
1642  // compute capability 6.x introduces operations that are atomic with
1643  // respect to other CPUs and GPUs in the system
1644  if (value & PI_USM_ACCESS)
1645  value |= PI_USM_ATOMIC_ACCESS;
1646  if (value & PI_USM_CONCURRENT_ACCESS)
1648  }
1649  return getInfo(param_value_size, param_value, param_value_size_ret, value);
1650  }
1652  // from cl_intel_unified_shared_memory:
1653  // "The shared system memory access capabilities apply to any allocations
1654  // made by a system allocator, such as malloc or new."
1655  //
1656  // query if/how the device can access pageable host memory allocated by the
1657  // system allocator
1658  pi_bitfield value = {};
1659  if (getAttribute(device, hipDeviceAttributePageableMemoryAccess)) {
1660  // the link between the device and the host does not support native
1661  // atomic operations
1663  }
1664  return getInfo(param_value_size, param_value, param_value_size_ret, value);
1665  }
1666 
1667  // TODO: Implement.
1670  // TODO: Investigate if this information is available on HIP.
1681  return PI_ERROR_INVALID_VALUE;
1682 
1683  default:
1685  }
1686  cl::sycl::detail::pi::die("Device info request not implemented");
1687  return {};
1688 }
1689 
1697  pi_native_handle *nativeHandle) {
1698  *nativeHandle = static_cast<pi_native_handle>(device->get());
1699  return PI_SUCCESS;
1700 }
1701 
1713  pi_device *device) {
1714  (void)nativeHandle;
1715  (void)platform;
1716  (void)device;
1718  "Creation of PI device from native handle not implemented");
1719  return {};
1720 }
1721 
1722 /* Context APIs */
1723 
1743  pi_uint32 num_devices, const pi_device *devices,
1744  void (*pfn_notify)(const char *errinfo,
1745  const void *private_info,
1746  size_t cb, void *user_data),
1747  void *user_data, pi_context *retcontext) {
1748 
1749  assert(devices != nullptr);
1750  // TODO: How to implement context callback?
1751  assert(pfn_notify == nullptr);
1752  assert(user_data == nullptr);
1753  assert(num_devices == 1);
1754  // Need input context
1755  assert(retcontext != nullptr);
1756  pi_result errcode_ret = PI_SUCCESS;
1757 
1758  // Parse properties.
1759  bool property_hip_primary = false;
1760  while (properties && (0 != *properties)) {
1761  // Consume property ID.
1762  pi_context_properties id = *properties;
1763  ++properties;
1764  // Consume property value.
1765  pi_context_properties value = *properties;
1766  ++properties;
1767  switch (id) {
1769  assert(value == PI_FALSE || value == PI_TRUE);
1770  property_hip_primary = static_cast<bool>(value);
1771  break;
1772  default:
1773  // Unknown property.
1775  "Unknown piContextCreate property in property list");
1776  return PI_ERROR_INVALID_VALUE;
1777  }
1778  }
1779 
1780  std::unique_ptr<_pi_context> piContextPtr{nullptr};
1781  try {
1782  hipCtx_t current = nullptr;
1783 
1784  if (property_hip_primary) {
1785  // Use the HIP primary context and assume that we want to use it
1786  // immediately as we want to forge context switches.
1787  hipCtx_t Ctxt;
1788  errcode_ret =
1789  PI_CHECK_ERROR(hipDevicePrimaryCtxRetain(&Ctxt, devices[0]->get()));
1790  piContextPtr = std::unique_ptr<_pi_context>(
1791  new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
1792  errcode_ret = PI_CHECK_ERROR(hipCtxPushCurrent(Ctxt));
1793  } else {
1794  // Create a scoped context.
1795  hipCtx_t newContext;
1796  PI_CHECK_ERROR(hipCtxGetCurrent(&current));
1797  errcode_ret = PI_CHECK_ERROR(
1798  hipCtxCreate(&newContext, hipDeviceMapHost, devices[0]->get()));
1799  piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{
1800  _pi_context::kind::user_defined, newContext, *devices});
1801  }
1802 
1803  // Use default stream to record base event counter
1804  PI_CHECK_ERROR(
1805  hipEventCreateWithFlags(&piContextPtr->evBase_, hipEventDefault));
1806  PI_CHECK_ERROR(hipEventRecord(piContextPtr->evBase_, 0));
1807 
1808  // For non-primary scoped contexts keep the last active on top of the stack
1809  // as `cuCtxCreate` replaces it implicitly otherwise.
1810  // Primary contexts are kept on top of the stack, so the previous context
1811  // is not queried and therefore not recovered.
1812  if (current != nullptr) {
1813  PI_CHECK_ERROR(hipCtxSetCurrent(current));
1814  }
1815 
1816  *retcontext = piContextPtr.release();
1817  } catch (pi_result err) {
1818  errcode_ret = err;
1819  } catch (...) {
1820  errcode_ret = PI_ERROR_OUT_OF_RESOURCES;
1821  }
1822  return errcode_ret;
1823 }
1824 
1826 
1827  assert(ctxt != nullptr);
1828 
1829  if (ctxt->decrement_reference_count() > 0) {
1830  return PI_SUCCESS;
1831  }
1832  ctxt->invoke_extended_deleters();
1833 
1834  std::unique_ptr<_pi_context> context{ctxt};
1835 
1836  PI_CHECK_ERROR(hipEventDestroy(context->evBase_));
1837 
1838  if (!ctxt->is_primary()) {
1839  hipCtx_t hipCtxt = ctxt->get();
1840  // hipCtxSynchronize is not supported for AMD platform so we can just
1841  // destroy the context, for NVIDIA make sure it's synchronized.
1842 #if defined(__HIP_PLATFORM_NVIDIA__)
1843  hipCtx_t current = nullptr;
1844  PI_CHECK_ERROR(hipCtxGetCurrent(&current));
1845  if (hipCtxt != current) {
1846  PI_CHECK_ERROR(hipCtxPushCurrent(hipCtxt));
1847  }
1848  PI_CHECK_ERROR(hipCtxSynchronize());
1849  PI_CHECK_ERROR(hipCtxGetCurrent(&current));
1850  if (hipCtxt == current) {
1851  PI_CHECK_ERROR(hipCtxPopCurrent(&current));
1852  }
1853 #endif
1854  return PI_CHECK_ERROR(hipCtxDestroy(hipCtxt));
1855  } else {
1856  // Primary context is not destroyed, but released
1857  hipDevice_t hipDev = ctxt->get_device()->get();
1858  hipCtx_t current;
1859  PI_CHECK_ERROR(hipCtxPopCurrent(&current));
1860  return PI_CHECK_ERROR(hipDevicePrimaryCtxRelease(hipDev));
1861  }
1862 
1863  hipCtx_t hipCtxt = ctxt->get();
1864  return PI_CHECK_ERROR(hipCtxDestroy(hipCtxt));
1865 }
1866 
1874  pi_native_handle *nativeHandle) {
1875  *nativeHandle = reinterpret_cast<pi_native_handle>(context->get());
1876  return PI_SUCCESS;
1877 }
1878 
1888  pi_uint32 num_devices,
1889  const pi_device *devices,
1890  bool ownNativeHandle,
1891  pi_context *context) {
1892  (void)nativeHandle;
1893  (void)num_devices;
1894  (void)devices;
1895  (void)ownNativeHandle;
1896  (void)context;
1898  "Creation of PI context from native handle not implemented");
1899  return {};
1900 }
1901 
1907  size_t size, void *host_ptr, pi_mem *ret_mem,
1908  const pi_mem_properties *properties) {
1909  // Need input memory object
1910  assert(ret_mem != nullptr);
1911  assert((properties == nullptr || *properties == 0) &&
1912  "no mem properties goes to HIP RT yet");
1913  // Currently, USE_HOST_PTR is not implemented using host register
1914  // since this triggers a weird segfault after program ends.
1915  // Setting this constant to true enables testing that behavior.
1916  const bool enableUseHostPtr = false;
1917  const bool performInitialCopy =
1918  (flags & PI_MEM_FLAGS_HOST_PTR_COPY) ||
1919  ((flags & PI_MEM_FLAGS_HOST_PTR_USE) && !enableUseHostPtr);
1920  pi_result retErr = PI_SUCCESS;
1921  pi_mem retMemObj = nullptr;
1922 
1923  try {
1924  ScopedContext active(context);
1925  void *ptr;
1928 
1929  if ((flags & PI_MEM_FLAGS_HOST_PTR_USE) && enableUseHostPtr) {
1930  retErr = PI_CHECK_ERROR(
1931  hipHostRegister(host_ptr, size, hipHostRegisterMapped));
1932  retErr = PI_CHECK_ERROR(hipHostGetDevicePointer(&ptr, host_ptr, 0));
1934  } else if (flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) {
1935  retErr = PI_CHECK_ERROR(hipHostMalloc(&host_ptr, size));
1936  retErr = PI_CHECK_ERROR(hipHostGetDevicePointer(&ptr, host_ptr, 0));
1938  } else {
1939  retErr = PI_CHECK_ERROR(hipMalloc(&ptr, size));
1940  if (flags & PI_MEM_FLAGS_HOST_PTR_COPY) {
1942  }
1943  }
1944 
1945  if (retErr == PI_SUCCESS) {
1946  pi_mem parentBuffer = nullptr;
1947 
1948  auto devPtr =
1949  reinterpret_cast<_pi_mem::mem_::mem_::buffer_mem_::native_type>(ptr);
1950  auto piMemObj = std::unique_ptr<_pi_mem>(new _pi_mem{
1951  context, parentBuffer, allocMode, devPtr, host_ptr, size});
1952  if (piMemObj != nullptr) {
1953  retMemObj = piMemObj.release();
1954  if (performInitialCopy) {
1955  // Operates on the default stream of the current HIP context.
1956  retErr = PI_CHECK_ERROR(hipMemcpyHtoD(devPtr, host_ptr, size));
1957  // Synchronize with default stream implicitly used by cuMemcpyHtoD
1958  // to make buffer data available on device before any other PI call
1959  // uses it.
1960  if (retErr == PI_SUCCESS) {
1961  hipStream_t defaultStream = 0;
1962  retErr = PI_CHECK_ERROR(hipStreamSynchronize(defaultStream));
1963  }
1964  }
1965  } else {
1966  retErr = PI_ERROR_OUT_OF_HOST_MEMORY;
1967  }
1968  }
1969  } catch (pi_result err) {
1970  retErr = err;
1971  } catch (...) {
1972  retErr = PI_ERROR_OUT_OF_RESOURCES;
1973  }
1974 
1975  *ret_mem = retMemObj;
1976 
1977  return retErr;
1978 }
1979 
1985  assert((memObj != nullptr) && "PI_ERROR_INVALID_MEM_OBJECTS");
1986 
1987  pi_result ret = PI_SUCCESS;
1988 
1989  try {
1990 
1991  // Do nothing if there are other references
1992  if (memObj->decrement_reference_count() > 0) {
1993  return PI_SUCCESS;
1994  }
1995 
1996  // make sure memObj is released in case PI_CHECK_ERROR throws
1997  std::unique_ptr<_pi_mem> uniqueMemObj(memObj);
1998 
1999  if (memObj->is_sub_buffer()) {
2000  return PI_SUCCESS;
2001  }
2002 
2003  ScopedContext active(uniqueMemObj->get_context());
2004 
2005  if (memObj->mem_type_ == _pi_mem::mem_type::buffer) {
2006  switch (uniqueMemObj->mem_.buffer_mem_.allocMode_) {
2009  ret = PI_CHECK_ERROR(
2010  hipFree((void *)uniqueMemObj->mem_.buffer_mem_.ptr_));
2011  break;
2013  ret = PI_CHECK_ERROR(
2014  hipHostUnregister(uniqueMemObj->mem_.buffer_mem_.hostPtr_));
2015  break;
2017  ret = PI_CHECK_ERROR(
2018  hipFreeHost(uniqueMemObj->mem_.buffer_mem_.hostPtr_));
2019  };
2020  }
2021 
2022  else if (memObj->mem_type_ == _pi_mem::mem_type::surface) {
2023  ret = PI_CHECK_ERROR(hipDestroySurfaceObject(
2024  uniqueMemObj->mem_.surface_mem_.get_surface()));
2025  auto array = uniqueMemObj->mem_.surface_mem_.get_array();
2026  ret = PI_CHECK_ERROR(hipFreeArray(array));
2027  }
2028 
2029  } catch (pi_result err) {
2030  ret = err;
2031  } catch (...) {
2032  ret = PI_ERROR_OUT_OF_RESOURCES;
2033  }
2034 
2035  if (ret != PI_SUCCESS) {
2036  // A reported HIP error is either an implementation or an asynchronous HIP
2037  // error for which it is unclear if the function that reported it succeeded
2038  // or not. Either way, the state of the program is compromised and likely
2039  // unrecoverable.
2041  "Unrecoverable program state reached in hip_piMemRelease");
2042  }
2043 
2044  return PI_SUCCESS;
2045 }
2046 
2052  pi_buffer_create_type buffer_create_type,
2053  void *buffer_create_info, pi_mem *memObj) {
2054  assert((parent_buffer != nullptr) && "PI_ERROR_INVALID_MEM_OBJECT");
2055  assert(parent_buffer->is_buffer() && "PI_ERROR_INVALID_MEM_OBJECTS");
2056  assert(!parent_buffer->is_sub_buffer() && "PI_ERROR_INVALID_MEM_OBJECT");
2057 
2058  // Default value for flags means PI_MEM_FLAGS_ACCCESS_RW.
2059  if (flags == 0) {
2060  flags = PI_MEM_FLAGS_ACCESS_RW;
2061  }
2062 
2063  assert((flags == PI_MEM_FLAGS_ACCESS_RW) && "PI_ERROR_INVALID_VALUE");
2064  assert((buffer_create_type == PI_BUFFER_CREATE_TYPE_REGION) &&
2065  "PI_ERROR_INVALID_VALUE");
2066  assert((buffer_create_info != nullptr) && "PI_ERROR_INVALID_VALUE");
2067  assert(memObj != nullptr);
2068 
2069  const auto bufferRegion =
2070  *reinterpret_cast<pi_buffer_region>(buffer_create_info);
2071  assert((bufferRegion.size != 0u) && "PI_ERROR_INVALID_BUFFER_SIZE");
2072 
2073  assert((bufferRegion.origin <= (bufferRegion.origin + bufferRegion.size)) &&
2074  "Overflow");
2075  assert(((bufferRegion.origin + bufferRegion.size) <=
2076  parent_buffer->mem_.buffer_mem_.get_size()) &&
2077  "PI_ERROR_INVALID_BUFFER_SIZE");
2078  // Retained indirectly due to retaining parent buffer below.
2079  pi_context context = parent_buffer->context_;
2082 
2083  assert(parent_buffer->mem_.buffer_mem_.ptr_ !=
2086  parent_buffer->mem_.buffer_mem_.get_with_offset(bufferRegion.origin);
2087 
2088  void *hostPtr = nullptr;
2089  if (parent_buffer->mem_.buffer_mem_.hostPtr_) {
2090  hostPtr = static_cast<char *>(parent_buffer->mem_.buffer_mem_.hostPtr_) +
2091  bufferRegion.origin;
2092  }
2093 
2094  ReleaseGuard<pi_mem> releaseGuard(parent_buffer);
2095 
2096  std::unique_ptr<_pi_mem> retMemObj{nullptr};
2097  try {
2098  ScopedContext active(context);
2099 
2100  retMemObj = std::unique_ptr<_pi_mem>{new _pi_mem{
2101  context, parent_buffer, allocMode, ptr, hostPtr, bufferRegion.size}};
2102  } catch (pi_result err) {
2103  *memObj = nullptr;
2104  return err;
2105  } catch (...) {
2106  *memObj = nullptr;
2107  return PI_ERROR_OUT_OF_HOST_MEMORY;
2108  }
2109 
2110  releaseGuard.dismiss();
2111  *memObj = retMemObj.release();
2112  return PI_SUCCESS;
2113 }
2114 
2116  size_t expectedQuerySize, void *queryOutput,
2117  size_t *writtenQuerySize) {
2118  (void)memObj;
2119  (void)queriedInfo;
2120  (void)expectedQuerySize;
2121  (void)queryOutput;
2122  (void)writtenQuerySize;
2123 
2124  cl::sycl::detail::pi::die("hip_piMemGetInfo not implemented");
2125 }
2126 
2134  pi_native_handle *nativeHandle) {
2135 #if defined(__HIP_PLATFORM_NVIDIA__)
2137  sizeof(pi_native_handle)) {
2138  // Check that all the upper bits that cannot be represented by
2139  // pi_native_handle are empty.
2140  // NOTE: The following shift might trigger a warning, but the check in the
2141  // if above makes sure that this does not underflow.
2143  mem->mem_.buffer_mem_.get() >> (sizeof(pi_native_handle) * CHAR_BIT);
2144  if (upperBits) {
2145  // Return an error if any of the remaining bits is non-zero.
2146  return PI_ERROR_INVALID_MEM_OBJECT;
2147  }
2148  }
2149  *nativeHandle = static_cast<pi_native_handle>(mem->mem_.buffer_mem_.get());
2150 #elif defined(__HIP_PLATFORM_AMD__)
2151  *nativeHandle =
2152  reinterpret_cast<pi_native_handle>(mem->mem_.buffer_mem_.get());
2153 #else
2154 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
2155 #endif
2156  return PI_SUCCESS;
2157 }
2158 
2172  bool ownNativeHandle,
2173  pi_mem *mem) {
2174  (void)nativeHandle;
2175  (void)context;
2176  (void)ownNativeHandle;
2177  (void)mem;
2178 
2180  "Creation of PI mem from native handle not implemented");
2181  return {};
2182 }
2183 
2191  pi_queue_properties properties, pi_queue *queue) {
2192  try {
2193  pi_result err = PI_SUCCESS;
2194 
2195  std::unique_ptr<_pi_queue> queueImpl{nullptr};
2196 
2197  if (context->get_device() != device) {
2198  *queue = nullptr;
2199  return PI_ERROR_INVALID_DEVICE;
2200  }
2201 
2202  ScopedContext active(context);
2203 
2204  hipStream_t hipStream;
2205 
2206  err = PI_CHECK_ERROR(hipStreamCreate(&hipStream));
2207  if (err != PI_SUCCESS) {
2208  return err;
2209  }
2210 
2211  queueImpl = std::unique_ptr<_pi_queue>(
2212  new _pi_queue{hipStream, context, device, properties});
2213 
2214  *queue = queueImpl.release();
2215 
2216  return PI_SUCCESS;
2217  } catch (pi_result err) {
2218 
2219  return err;
2220 
2221  } catch (...) {
2222 
2223  return PI_ERROR_OUT_OF_RESOURCES;
2224  }
2225 }
2226 
2228  size_t param_value_size, void *param_value,
2229  size_t *param_value_size_ret) {
2230  assert(command_queue != nullptr);
2231 
2232  switch (param_name) {
2233  case PI_QUEUE_INFO_CONTEXT:
2234  return getInfo(param_value_size, param_value, param_value_size_ret,
2235  command_queue->context_);
2236  case PI_QUEUE_INFO_DEVICE:
2237  return getInfo(param_value_size, param_value, param_value_size_ret,
2238  command_queue->device_);
2240  return getInfo(param_value_size, param_value, param_value_size_ret,
2241  command_queue->get_reference_count());
2243  return getInfo(param_value_size, param_value, param_value_size_ret,
2244  command_queue->properties_);
2245  default:
2247  }
2248  cl::sycl::detail::pi::die("Queue info request not implemented");
2249  return {};
2250 }
2251 
2253  assert(command_queue != nullptr);
2254  assert(command_queue->get_reference_count() > 0);
2255 
2256  command_queue->increment_reference_count();
2257  return PI_SUCCESS;
2258 }
2259 
2261  assert(command_queue != nullptr);
2262 
2263  if (command_queue->decrement_reference_count() > 0) {
2264  return PI_SUCCESS;
2265  }
2266 
2267  try {
2268  std::unique_ptr<_pi_queue> queueImpl(command_queue);
2269 
2270  ScopedContext active(command_queue->get_context());
2271 
2272  auto stream = queueImpl->stream_;
2273  PI_CHECK_ERROR(hipStreamSynchronize(stream));
2274  PI_CHECK_ERROR(hipStreamDestroy(stream));
2275 
2276  return PI_SUCCESS;
2277  } catch (pi_result err) {
2278  return err;
2279  } catch (...) {
2280  return PI_ERROR_OUT_OF_RESOURCES;
2281  }
2282 }
2283 
2285 
2286  // set default result to a negative result (avoid false-positve tests)
2287  pi_result result = PI_ERROR_OUT_OF_HOST_MEMORY;
2288 
2289  try {
2290 
2291  assert(command_queue !=
2292  nullptr); // need PI_ERROR_INVALID_EXTERNAL_HANDLE error code
2293  ScopedContext active(command_queue->get_context());
2294  result = PI_CHECK_ERROR(hipStreamSynchronize(command_queue->stream_));
2295 
2296  } catch (pi_result err) {
2297 
2298  result = err;
2299 
2300  } catch (...) {
2301 
2302  result = PI_ERROR_OUT_OF_RESOURCES;
2303  }
2304 
2305  return result;
2306 }
2307 
2308 // There is no HIP counterpart for queue flushing and we don't run into the
2309 // same problem of having to flush cross-queue dependencies as some of the
2310 // other plugins, so it can be left as no-op.
2312  (void)command_queue;
2313  return PI_SUCCESS;
2314 }
2315 
2323  pi_native_handle *nativeHandle) {
2324  *nativeHandle = reinterpret_cast<pi_native_handle>(queue->get());
2325  return PI_SUCCESS;
2326 }
2327 
2342  pi_device device,
2343  bool ownNativeHandle,
2344  pi_queue *queue) {
2345  (void)nativeHandle;
2346  (void)context;
2347  (void)device;
2348  (void)queue;
2349  (void)ownNativeHandle;
2351  "Creation of PI queue from native handle not implemented");
2352  return {};
2353 }
2354 
2356  pi_bool blocking_write, size_t offset,
2357  size_t size, void *ptr,
2358  pi_uint32 num_events_in_wait_list,
2359  const pi_event *event_wait_list,
2360  pi_event *event) {
2361 
2362  assert(buffer != nullptr);
2363  assert(command_queue != nullptr);
2364  pi_result retErr = PI_SUCCESS;
2365  hipStream_t hipStream = command_queue->get();
2366  std::unique_ptr<_pi_event> retImplEv{nullptr};
2367 
2368  try {
2369  ScopedContext active(command_queue->get_context());
2370 
2371  retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
2372  event_wait_list, nullptr);
2373 
2374  if (event) {
2375  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
2376  PI_COMMAND_TYPE_MEM_BUFFER_WRITE, command_queue));
2377  retImplEv->start();
2378  }
2379 
2380  retErr = PI_CHECK_ERROR(
2381  hipMemcpyHtoDAsync(buffer->mem_.buffer_mem_.get_with_offset(offset),
2382  ptr, size, hipStream));
2383 
2384  if (event) {
2385  retErr = retImplEv->record();
2386  }
2387 
2388  if (blocking_write) {
2389  retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
2390  }
2391 
2392  if (event) {
2393  *event = retImplEv.release();
2394  }
2395  } catch (pi_result err) {
2396  retErr = err;
2397  }
2398  return retErr;
2399 }
2400 
2402  pi_bool blocking_read, size_t offset,
2403  size_t size, void *ptr,
2404  pi_uint32 num_events_in_wait_list,
2405  const pi_event *event_wait_list,
2406  pi_event *event) {
2407 
2408  assert(buffer != nullptr);
2409  assert(command_queue != nullptr);
2410  pi_result retErr = PI_SUCCESS;
2411  hipStream_t hipStream = command_queue->get();
2412  std::unique_ptr<_pi_event> retImplEv{nullptr};
2413 
2414  try {
2415  ScopedContext active(command_queue->get_context());
2416 
2417  retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
2418  event_wait_list, nullptr);
2419 
2420  if (event) {
2421  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
2422  PI_COMMAND_TYPE_MEM_BUFFER_READ, command_queue));
2423  retImplEv->start();
2424  }
2425 
2426  retErr = PI_CHECK_ERROR(hipMemcpyDtoHAsync(
2427  ptr, buffer->mem_.buffer_mem_.get_with_offset(offset), size,
2428  hipStream));
2429 
2430  if (event) {
2431  retErr = retImplEv->record();
2432  }
2433 
2434  if (blocking_read) {
2435  retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
2436  }
2437 
2438  if (event) {
2439  *event = retImplEv.release();
2440  }
2441 
2442  } catch (pi_result err) {
2443  retErr = err;
2444  }
2445  return retErr;
2446 }
2447 
2448 pi_result hip_piEventsWait(pi_uint32 num_events, const pi_event *event_list) {
2449 
2450  try {
2451  assert(num_events != 0);
2452  assert(event_list);
2453  if (num_events == 0) {
2454  return PI_ERROR_INVALID_VALUE;
2455  }
2456 
2457  if (!event_list) {
2458  return PI_ERROR_INVALID_EVENT;
2459  }
2460 
2461  auto context = event_list[0]->get_context();
2462  ScopedContext active(context);
2463 
2464  auto waitFunc = [context](pi_event event) -> pi_result {
2465  if (!event) {
2466  return PI_ERROR_INVALID_EVENT;
2467  }
2468 
2469  if (event->get_context() != context) {
2470  return PI_ERROR_INVALID_CONTEXT;
2471  }
2472 
2473  return event->wait();
2474  };
2475  return forLatestEvents(event_list, num_events, waitFunc);
2476  } catch (pi_result err) {
2477  return err;
2478  } catch (...) {
2479  return PI_ERROR_OUT_OF_RESOURCES;
2480  }
2481 }
2482 
2483 pi_result hip_piKernelCreate(pi_program program, const char *kernel_name,
2484  pi_kernel *kernel) {
2485  assert(kernel != nullptr);
2486  assert(program != nullptr);
2487 
2488  pi_result retErr = PI_SUCCESS;
2489  std::unique_ptr<_pi_kernel> retKernel{nullptr};
2490 
2491  try {
2492  ScopedContext active(program->get_context());
2493 
2494  hipFunction_t hipFunc;
2495  retErr = PI_CHECK_ERROR(
2496  hipModuleGetFunction(&hipFunc, program->get(), kernel_name));
2497 
2498  std::string kernel_name_woffset = std::string(kernel_name) + "_with_offset";
2499  hipFunction_t hipFuncWithOffsetParam;
2500  hipError_t offsetRes = hipModuleGetFunction(
2501  &hipFuncWithOffsetParam, program->get(), kernel_name_woffset.c_str());
2502 
2503  // If there is no kernel with global offset parameter we mark it as missing
2504  if (offsetRes == hipErrorNotFound) {
2505  hipFuncWithOffsetParam = nullptr;
2506  } else {
2507  retErr = PI_CHECK_ERROR(offsetRes);
2508  }
2509 
2510  retKernel = std::unique_ptr<_pi_kernel>(
2511  new _pi_kernel{hipFunc, hipFuncWithOffsetParam, kernel_name, program,
2512  program->get_context()});
2513  } catch (pi_result err) {
2514  retErr = err;
2515  } catch (...) {
2516  retErr = PI_ERROR_OUT_OF_HOST_MEMORY;
2517  }
2518 
2519  *kernel = retKernel.release();
2520  return retErr;
2521 }
2522 
2524  size_t arg_size, const void *arg_value) {
2525 
2526  assert(kernel != nullptr);
2527  pi_result retErr = PI_SUCCESS;
2528  try {
2529  if (arg_value) {
2530  kernel->set_kernel_arg(arg_index, arg_size, arg_value);
2531  } else {
2532  kernel->set_kernel_local_arg(arg_index, arg_size);
2533  }
2534  } catch (pi_result err) {
2535  retErr = err;
2536  }
2537  return retErr;
2538 }
2539 
2541  const pi_mem *arg_value) {
2542 
2543  assert(kernel != nullptr);
2544  assert(arg_value != nullptr);
2545 
2546  pi_result retErr = PI_SUCCESS;
2547  try {
2548  pi_mem arg_mem = *arg_value;
2549 
2550  if (arg_mem->mem_type_ == _pi_mem::mem_type::surface) {
2551  auto array = arg_mem->mem_.surface_mem_.get_array();
2552  hipArray_Format Format;
2553  size_t NumChannels;
2554  getArrayDesc(array, Format, NumChannels);
2555  if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
2556  Format != HIP_AD_FORMAT_SIGNED_INT32 &&
2557  Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) {
2559  "PI HIP kernels only support images with channel types int32, "
2560  "uint32, float, and half.");
2561  }
2562  hipSurfaceObject_t hipSurf = arg_mem->mem_.surface_mem_.get_surface();
2563  kernel->set_kernel_arg(arg_index, sizeof(hipSurf), (void *)&hipSurf);
2564  } else
2565 
2566  {
2567  void *hipPtr = arg_mem->mem_.buffer_mem_.get_void();
2568  kernel->set_kernel_arg(arg_index, sizeof(void *), (void *)&hipPtr);
2569  }
2570  } catch (pi_result err) {
2571  retErr = err;
2572  }
2573  return retErr;
2574 }
2575 
2577  const pi_sampler *arg_value) {
2578 
2579  assert(kernel != nullptr);
2580  assert(arg_value != nullptr);
2581 
2582  pi_result retErr = PI_SUCCESS;
2583  try {
2584  pi_uint32 samplerProps = (*arg_value)->props_;
2585  kernel->set_kernel_arg(arg_index, sizeof(pi_uint32), (void *)&samplerProps);
2586  } catch (pi_result err) {
2587  retErr = err;
2588  }
2589  return retErr;
2590 }
2591 
2593  pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim,
2594  const size_t *global_work_offset, const size_t *global_work_size,
2595  const size_t *local_work_size, pi_uint32 num_events_in_wait_list,
2596  const pi_event *event_wait_list, pi_event *event) {
2597 
2598  // Preconditions
2599  assert(command_queue != nullptr);
2600  assert(command_queue->get_context() == kernel->get_context());
2601  assert(kernel != nullptr);
2602  assert(global_work_offset != nullptr);
2603  assert(work_dim > 0);
2604  assert(work_dim < 4);
2605 
2606  // Set the number of threads per block to the number of threads per warp
2607  // by default unless user has provided a better number
2608  size_t threadsPerBlock[3] = {32u, 1u, 1u};
2609  size_t maxWorkGroupSize = 0u;
2610  size_t maxThreadsPerBlock[3] = {};
2611  bool providedLocalWorkGroupSize = (local_work_size != nullptr);
2612 
2613  {
2614  pi_result retError = hip_piDeviceGetInfo(
2616  sizeof(maxThreadsPerBlock), maxThreadsPerBlock, nullptr);
2617  assert(retError == PI_SUCCESS);
2618  (void)retError;
2619 
2620  retError = hip_piDeviceGetInfo(
2622  sizeof(maxWorkGroupSize), &maxWorkGroupSize, nullptr);
2623  assert(retError == PI_SUCCESS);
2624  // The maxWorkGroupsSize = 1024 for AMD GPU
2625  // The maxThreadsPerBlock = {1024, 1024, 1024}
2626 
2627  if (providedLocalWorkGroupSize) {
2628  auto isValid = [&](int dim) {
2629  if (local_work_size[dim] > maxThreadsPerBlock[dim])
2630  return PI_ERROR_INVALID_WORK_ITEM_SIZE;
2631  // Checks that local work sizes are a divisor of the global work sizes
2632  // which includes that the local work sizes are neither larger than the
2633  // global work sizes and not 0.
2634  if (0u == local_work_size[dim])
2635  return PI_ERROR_INVALID_WORK_GROUP_SIZE;
2636  if (0u != (global_work_size[dim] % local_work_size[dim]))
2637  return PI_ERROR_INVALID_WORK_GROUP_SIZE;
2638  threadsPerBlock[dim] = local_work_size[dim];
2639  return PI_SUCCESS;
2640  };
2641 
2642  for (size_t dim = 0; dim < work_dim; dim++) {
2643  auto err = isValid(dim);
2644  if (err != PI_SUCCESS)
2645  return err;
2646  }
2647  } else {
2648  simpleGuessLocalWorkSize(threadsPerBlock, global_work_size,
2649  maxThreadsPerBlock, kernel);
2650  }
2651  }
2652 
2653  if (maxWorkGroupSize <
2654  size_t(threadsPerBlock[0] * threadsPerBlock[1] * threadsPerBlock[2])) {
2655  return PI_ERROR_INVALID_WORK_GROUP_SIZE;
2656  }
2657 
2658  size_t blocksPerGrid[3] = {1u, 1u, 1u};
2659 
2660  for (size_t i = 0; i < work_dim; i++) {
2661  blocksPerGrid[i] =
2662  (global_work_size[i] + threadsPerBlock[i] - 1) / threadsPerBlock[i];
2663  }
2664 
2665  pi_result retError = PI_SUCCESS;
2666  std::unique_ptr<_pi_event> retImplEv{nullptr};
2667 
2668  try {
2669  ScopedContext active(command_queue->get_context());
2670  hipStream_t hipStream = command_queue->get();
2671  hipFunction_t hipFunc = kernel->get();
2672 
2673  retError = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
2674  event_wait_list, nullptr);
2675 
2676  // Set the implicit global offset parameter if kernel has offset variant
2677  if (kernel->get_with_offset_parameter()) {
2678  std::uint32_t hip_implicit_offset[3] = {0, 0, 0};
2679  if (global_work_offset) {
2680  for (size_t i = 0; i < work_dim; i++) {
2681  hip_implicit_offset[i] =
2682  static_cast<std::uint32_t>(global_work_offset[i]);
2683  if (global_work_offset[i] != 0) {
2684  hipFunc = kernel->get_with_offset_parameter();
2685  }
2686  }
2687  }
2688  kernel->set_implicit_offset_arg(sizeof(hip_implicit_offset),
2689  hip_implicit_offset);
2690  }
2691 
2692  auto argIndices = kernel->get_arg_indices();
2693 
2694  if (event) {
2695  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
2696  PI_COMMAND_TYPE_NDRANGE_KERNEL, command_queue));
2697  retImplEv->start();
2698  }
2699 
2700  retError = PI_CHECK_ERROR(hipModuleLaunchKernel(
2701  hipFunc, blocksPerGrid[0], blocksPerGrid[1], blocksPerGrid[2],
2702  threadsPerBlock[0], threadsPerBlock[1], threadsPerBlock[2],
2703  kernel->get_local_size(), hipStream, argIndices.data(), nullptr));
2704 
2705  kernel->clear_local_size();
2706  if (event) {
2707  retError = retImplEv->record();
2708  }
2709 
2710  if (event) {
2711  *event = retImplEv.release();
2712  }
2713  } catch (pi_result err) {
2714  retError = err;
2715  }
2716  return retError;
2717 }
2718 
2720 pi_result
2721 hip_piEnqueueNativeKernel(pi_queue queue, void (*user_func)(void *), void *args,
2722  size_t cb_args, pi_uint32 num_mem_objects,
2723  const pi_mem *mem_list, const void **args_mem_loc,
2724  pi_uint32 num_events_in_wait_list,
2725  const pi_event *event_wait_list, pi_event *event) {
2726  (void)queue;
2727  (void)user_func;
2728  (void)args;
2729  (void)cb_args;
2730  (void)num_mem_objects;
2731  (void)mem_list;
2732  (void)args_mem_loc;
2733  (void)num_events_in_wait_list;
2734  (void)event_wait_list;
2735  (void)event;
2736 
2737  cl::sycl::detail::pi::die("Not implemented in HIP backend");
2738  return {};
2739 }
2740 
2742 
2744  const pi_image_format *image_format,
2745  const pi_image_desc *image_desc, void *host_ptr,
2746  pi_mem *ret_mem) {
2747 
2748  // Need input memory object
2749  assert(ret_mem != nullptr);
2750  const bool performInitialCopy = (flags & PI_MEM_FLAGS_HOST_PTR_COPY) ||
2751  ((flags & PI_MEM_FLAGS_HOST_PTR_USE));
2752  pi_result retErr = PI_SUCCESS;
2753 
2754  // We only support RBGA channel order
2755  // TODO: check SYCL CTS and spec. May also have to support BGRA
2756  if (image_format->image_channel_order !=
2759  "hip_piMemImageCreate only supports RGBA channel order");
2760  }
2761 
2762  // We have to use cuArray3DCreate, which has some caveats. The height and
2763  // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc gives
2764  // a minimum value of 1, so we need to convert the answer.
2765  HIP_ARRAY3D_DESCRIPTOR array_desc;
2766  array_desc.NumChannels = 4; // Only support 4 channel image
2767  array_desc.Flags = 0; // No flags required
2768  array_desc.Width = image_desc->image_width;
2769  if (image_desc->image_type == PI_MEM_TYPE_IMAGE1D) {
2770  array_desc.Height = 0;
2771  array_desc.Depth = 0;
2772  } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE2D) {
2773  array_desc.Height = image_desc->image_height;
2774  array_desc.Depth = 0;
2775  } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE3D) {
2776  array_desc.Height = image_desc->image_height;
2777  array_desc.Depth = image_desc->image_depth;
2778  }
2779 
2780  // We need to get this now in bytes for calculating the total image size later
2781  size_t pixel_type_size_bytes;
2782 
2783  switch (image_format->image_channel_data_type) {
2786  array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT8;
2787  pixel_type_size_bytes = 1;
2788  break;
2790  array_desc.Format = HIP_AD_FORMAT_SIGNED_INT8;
2791  pixel_type_size_bytes = 1;
2792  break;
2795  array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT16;
2796  pixel_type_size_bytes = 2;
2797  break;
2799  array_desc.Format = HIP_AD_FORMAT_SIGNED_INT16;
2800  pixel_type_size_bytes = 2;
2801  break;
2803  array_desc.Format = HIP_AD_FORMAT_HALF;
2804  pixel_type_size_bytes = 2;
2805  break;
2807  array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT32;
2808  pixel_type_size_bytes = 4;
2809  break;
2811  array_desc.Format = HIP_AD_FORMAT_SIGNED_INT32;
2812  pixel_type_size_bytes = 4;
2813  break;
2815  array_desc.Format = HIP_AD_FORMAT_FLOAT;
2816  pixel_type_size_bytes = 4;
2817  break;
2818  default:
2820  "hip_piMemImageCreate given unsupported image_channel_data_type");
2821  }
2822 
2823  // When a dimension isn't used image_desc has the size set to 1
2824  size_t pixel_size_bytes =
2825  pixel_type_size_bytes * 4; // 4 is the only number of channels we support
2826  size_t image_size_bytes = pixel_size_bytes * image_desc->image_width *
2827  image_desc->image_height * image_desc->image_depth;
2828 
2829  ScopedContext active(context);
2830  hipArray *image_array;
2831  retErr = PI_CHECK_ERROR(hipArray3DCreate(
2832  reinterpret_cast<hipCUarray *>(&image_array), &array_desc));
2833 
2834  try {
2835  if (performInitialCopy) {
2836  // We have to use a different copy function for each image dimensionality
2837  if (image_desc->image_type == PI_MEM_TYPE_IMAGE1D) {
2838  retErr = PI_CHECK_ERROR(
2839  hipMemcpyHtoA(image_array, 0, host_ptr, image_size_bytes));
2840  } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE2D) {
2841  hip_Memcpy2D cpy_desc;
2842  memset(&cpy_desc, 0, sizeof(cpy_desc));
2843  cpy_desc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
2844  cpy_desc.srcHost = host_ptr;
2845  cpy_desc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
2846  cpy_desc.dstArray = reinterpret_cast<hipCUarray>(image_array);
2847  cpy_desc.WidthInBytes = pixel_size_bytes * image_desc->image_width;
2848  cpy_desc.Height = image_desc->image_height;
2849  retErr = PI_CHECK_ERROR(hipMemcpyParam2D(&cpy_desc));
2850  } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE3D) {
2851  HIP_MEMCPY3D cpy_desc;
2852  memset(&cpy_desc, 0, sizeof(cpy_desc));
2853  cpy_desc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
2854  cpy_desc.srcHost = host_ptr;
2855  cpy_desc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
2856  cpy_desc.dstArray = reinterpret_cast<hipCUarray>(image_array);
2857  cpy_desc.WidthInBytes = pixel_size_bytes * image_desc->image_width;
2858  cpy_desc.Height = image_desc->image_height;
2859  cpy_desc.Depth = image_desc->image_depth;
2860  retErr = PI_CHECK_ERROR(hipDrvMemcpy3D(&cpy_desc));
2861  }
2862  }
2863 
2864  // HIP_RESOURCE_DESC is a union of different structs, shown here
2865  // We need to fill it as described here to use it for a surface or texture
2866  // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and
2867  // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array
2868  // handle.
2869  // HIP_RESOURCE_DESC::flags must be set to zero
2870 
2871  hipResourceDesc image_res_desc;
2872  image_res_desc.res.array.array = image_array;
2873  image_res_desc.resType = hipResourceTypeArray;
2874 
2875  hipSurfaceObject_t surface;
2876  retErr = PI_CHECK_ERROR(hipCreateSurfaceObject(&surface, &image_res_desc));
2877 
2878  auto piMemObj = std::unique_ptr<_pi_mem>(new _pi_mem{
2879  context, image_array, surface, image_desc->image_type, host_ptr});
2880 
2881  if (piMemObj == nullptr) {
2882  return PI_ERROR_OUT_OF_HOST_MEMORY;
2883  }
2884 
2885  *ret_mem = piMemObj.release();
2886  } catch (pi_result err) {
2887  PI_CHECK_ERROR(hipFreeArray(image_array));
2888  return err;
2889  } catch (...) {
2890  PI_CHECK_ERROR(hipFreeArray(image_array));
2891  return PI_ERROR_UNKNOWN;
2892  }
2893  return retErr;
2894 }
2895 
2898  size_t param_value_size, void *param_value,
2899  size_t *param_value_size_ret) {
2900  (void)image;
2901  (void)param_name;
2902  (void)param_value_size;
2903  (void)param_value;
2904  (void)param_value_size_ret;
2905 
2906  cl::sycl::detail::pi::die("hip_piMemImageGetInfo not implemented");
2907  return {};
2908 }
2909 
2911  assert(mem != nullptr);
2912  assert(mem->get_reference_count() > 0);
2914  return PI_SUCCESS;
2915 }
2916 
2921  const char **strings,
2922  const size_t *lengths,
2923  pi_program *program) {
2924  (void)context;
2925  (void)count;
2926  (void)strings;
2927  (void)lengths;
2928  (void)program;
2929 
2931  "hip_piclProgramCreateWithSource not implemented");
2932  return PI_ERROR_INVALID_OPERATION;
2933 }
2934 
2940  const pi_device *device_list, const char *options,
2941  void (*pfn_notify)(pi_program program,
2942  void *user_data),
2943  void *user_data) {
2944 
2945  assert(program != nullptr);
2946  assert(num_devices == 1 || num_devices == 0);
2947  assert(device_list != nullptr || num_devices == 0);
2948  assert(pfn_notify == nullptr);
2949  assert(user_data == nullptr);
2950  pi_result retError = PI_SUCCESS;
2951 
2952  try {
2953  ScopedContext active(program->get_context());
2954 
2955  program->build_program(options);
2956 
2957  } catch (pi_result err) {
2958  retError = err;
2959  }
2960  return retError;
2961 }
2962 
2965  pi_program *res_program) {
2966  (void)context;
2967  (void)il;
2968  (void)length;
2969  (void)res_program;
2970 
2971  cl::sycl::detail::pi::die("hip_piProgramCreate not implemented");
2972  return {};
2973 }
2974 
2982  pi_context context, pi_uint32 num_devices, const pi_device *device_list,
2983  const size_t *lengths, const unsigned char **binaries,
2984  size_t num_metadata_entries, const pi_device_binary_property *metadata,
2985  pi_int32 *binary_status, pi_program *program) {
2986  (void)num_metadata_entries;
2987  (void)metadata;
2988  (void)binary_status;
2989 
2990  assert(context != nullptr);
2991  assert(binaries != nullptr);
2992  assert(program != nullptr);
2993  assert(device_list != nullptr);
2994  assert(num_devices == 1 && "HIP contexts are for a single device");
2995  assert((context->get_device()->get() == device_list[0]->get()) &&
2996  "Mismatch between devices context and passed context when creating "
2997  "program from binary");
2998 
2999  pi_result retError = PI_SUCCESS;
3000 
3001  std::unique_ptr<_pi_program> retProgram{new _pi_program{context}};
3002 
3003  // TODO: Set metadata here and use reqd_work_group_size information.
3004  // See cuda_piProgramCreateWithBinary
3005 
3006  const bool has_length = (lengths != nullptr);
3007  size_t length = has_length
3008  ? lengths[0]
3009  : strlen(reinterpret_cast<const char *>(binaries[0])) + 1;
3010 
3011  assert(length != 0);
3012 
3013  retProgram->set_binary(reinterpret_cast<const char *>(binaries[0]), length);
3014 
3015  *program = retProgram.release();
3016 
3017  return retError;
3018 }
3019 
3021  size_t param_value_size, void *param_value,
3022  size_t *param_value_size_ret) {
3023  assert(program != nullptr);
3024 
3025  switch (param_name) {
3027  return getInfo(param_value_size, param_value, param_value_size_ret,
3028  program->get_reference_count());
3030  return getInfo(param_value_size, param_value, param_value_size_ret,
3031  program->context_);
3033  return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
3035  return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
3036  &program->context_->deviceId_);
3038  return getInfo(param_value_size, param_value, param_value_size_ret,
3039  program->binary_);
3041  return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
3042  &program->binarySizeInBytes_);
3044  return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
3045  &program->binary_);
3047  return getInfo(param_value_size, param_value, param_value_size_ret,
3048  getKernelNames(program).c_str());
3049  }
3050  default:
3052  }
3053  cl::sycl::detail::pi::die("Program info request not implemented");
3054  return {};
3055 }
3056 
3058  const pi_device *device_list, const char *options,
3059  pi_uint32 num_input_programs,
3060  const pi_program *input_programs,
3061  void (*pfn_notify)(pi_program program,
3062  void *user_data),
3063  void *user_data, pi_program *ret_program) {
3064  (void)context;
3065  (void)num_devices;
3066  (void)device_list;
3067  (void)options;
3068  (void)num_input_programs;
3069  (void)input_programs;
3070  (void)pfn_notify;
3071  (void)user_data;
3072  (void)ret_program;
3074  "hip_piProgramLink: linking not supported with hip backend");
3075  return {};
3076 }
3077 
3083  pi_program program, pi_uint32 num_devices, const pi_device *device_list,
3084  const char *options, pi_uint32 num_input_headers,
3085  const pi_program *input_headers, const char **header_include_names,
3086  void (*pfn_notify)(pi_program program, void *user_data), void *user_data) {
3087  (void)input_headers;
3088  (void)header_include_names;
3089 
3090  assert(program != nullptr);
3091  assert(num_devices == 1 || num_devices == 0);
3092  assert(device_list != nullptr || num_devices == 0);
3093  assert(pfn_notify == nullptr);
3094  assert(user_data == nullptr);
3095  assert(num_input_headers == 0);
3096  pi_result retError = PI_SUCCESS;
3097 
3098  try {
3099  ScopedContext active(program->get_context());
3100 
3101  program->build_program(options);
3102 
3103  } catch (pi_result err) {
3104  retError = err;
3105  }
3106  return retError;
3107 }
3108 
3110  pi_program_build_info param_name,
3111  size_t param_value_size, void *param_value,
3112  size_t *param_value_size_ret) {
3113  (void)device;
3114 
3115  assert(program != nullptr);
3116 
3117  switch (param_name) {
3119  return getInfo(param_value_size, param_value, param_value_size_ret,
3120  program->buildStatus_);
3121  }
3123  return getInfo(param_value_size, param_value, param_value_size_ret,
3124  program->buildOptions_.c_str());
3126  return getInfoArray(program->MAX_LOG_SIZE, param_value_size, param_value,
3127  param_value_size_ret, program->infoLog_);
3128  default:
3130  }
3131  cl::sycl::detail::pi::die("Program Build info request not implemented");
3132  return {};
3133 }
3134 
3136  assert(program != nullptr);
3137  assert(program->get_reference_count() > 0);
3138  program->increment_reference_count();
3139  return PI_SUCCESS;
3140 }
3141 
3146  assert(program != nullptr);
3147 
3148  // double delete or someone is messing with the ref count.
3149  // either way, cannot safely proceed.
3150  assert(program->get_reference_count() != 0 &&
3151  "Reference count overflow detected in hip_piProgramRelease.");
3152 
3153  // decrement ref count. If it is 0, delete the program.
3154  if (program->decrement_reference_count() == 0) {
3155 
3156  std::unique_ptr<_pi_program> program_ptr{program};
3157 
3158  pi_result result = PI_ERROR_INVALID_PROGRAM;
3159 
3160  try {
3161  ScopedContext active(program->get_context());
3162  auto hipModule = program->get();
3163  result = PI_CHECK_ERROR(hipModuleUnload(hipModule));
3164  } catch (...) {
3165  result = PI_ERROR_OUT_OF_RESOURCES;
3166  }
3167 
3168  return result;
3169  }
3170 
3171  return PI_SUCCESS;
3172 }
3173 
3181  pi_native_handle *nativeHandle) {
3182  *nativeHandle = reinterpret_cast<pi_native_handle>(program->get());
3183  return PI_SUCCESS;
3184 }
3185 
3199  bool ownNativeHandle,
3200  pi_program *program) {
3201  (void)nativeHandle;
3202  (void)context;
3203  (void)ownNativeHandle;
3204  (void)program;
3205 
3207  "Creation of PI program from native handle not implemented");
3208  return {};
3209 }
3210 
3212  size_t param_value_size, void *param_value,
3213  size_t *param_value_size_ret) {
3214 
3215  if (kernel != nullptr) {
3216 
3217  switch (param_name) {
3219  return getInfo(param_value_size, param_value, param_value_size_ret,
3220  kernel->get_name());
3222  return getInfo(param_value_size, param_value, param_value_size_ret,
3223  kernel->get_num_args());
3225  return getInfo(param_value_size, param_value, param_value_size_ret,
3226  kernel->get_reference_count());
3227  case PI_KERNEL_INFO_CONTEXT: {
3228  return getInfo(param_value_size, param_value, param_value_size_ret,
3229  kernel->get_context());
3230  }
3231  case PI_KERNEL_INFO_PROGRAM: {
3232  return getInfo(param_value_size, param_value, param_value_size_ret,
3233  kernel->get_program());
3234  }
3236  return getInfo(param_value_size, param_value, param_value_size_ret, "");
3237  }
3238  default: {
3240  }
3241  }
3242  }
3243 
3244  return PI_ERROR_INVALID_KERNEL;
3245 }
3246 
3248  pi_kernel_group_info param_name,
3249  size_t param_value_size, void *param_value,
3250  size_t *param_value_size_ret) {
3251 
3252  // here we want to query about a kernel's hip blocks!
3253 
3254  if (kernel != nullptr) {
3255 
3256  switch (param_name) {
3258  int max_threads = 0;
3260  hipFuncGetAttribute(&max_threads,
3261  HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
3262  kernel->get()) == hipSuccess);
3263  return getInfo(param_value_size, param_value, param_value_size_ret,
3264  size_t(max_threads));
3265  }
3267  // Returns the work-group size specified in the kernel source or IL.
3268  // If the work-group size is not specified in the kernel source or IL,
3269  // (0, 0, 0) is returned.
3270  // https://www.khronos.org/registry/OpenCL/sdk/2.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
3271 
3272  // TODO: can we extract the work group size from the PTX?
3273  size_t group_size[3] = {0, 0, 0};
3274  return getInfoArray(3, param_value_size, param_value,
3275  param_value_size_ret, group_size);
3276  }
3278  // OpenCL LOCAL == HIP SHARED
3279  int bytes = 0;
3281  hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
3282  kernel->get()) == hipSuccess);
3283  return getInfo(param_value_size, param_value, param_value_size_ret,
3284  pi_uint64(bytes));
3285  }
3287  // Work groups should be multiples of the warp size
3288  int warpSize = 0;
3290  hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
3291  device->get()) == hipSuccess);
3292  return getInfo(param_value_size, param_value, param_value_size_ret,
3293  static_cast<size_t>(warpSize));
3294  }
3296  // OpenCL PRIVATE == HIP LOCAL
3297  int bytes = 0;
3299  hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
3300  kernel->get()) == hipSuccess);
3301  return getInfo(param_value_size, param_value, param_value_size_ret,
3302  pi_uint64(bytes));
3303  }
3305  cl::sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in "
3306  "piKernelGetGroupInfo not implemented\n");
3307  return {};
3308  }
3309 
3310  default:
3312  }
3313  }
3314 
3315  return PI_ERROR_INVALID_KERNEL;
3316 }
3317 
3320  size_t input_value_size, const void *input_value, size_t param_value_size,
3321  void *param_value, size_t *param_value_size_ret) {
3322  (void)input_value_size;
3323  (void)input_value;
3324 
3325  if (kernel != nullptr) {
3326  switch (param_name) {
3328  // Sub-group size is equivalent to warp size
3329  int warpSize = 0;
3331  hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
3332  device->get()) == hipSuccess);
3333  return getInfo(param_value_size, param_value, param_value_size_ret,
3334  static_cast<uint32_t>(warpSize));
3335  }
3337  // Number of sub-groups = max block size / warp size + possible remainder
3338  int max_threads = 0;
3340  hipFuncGetAttribute(&max_threads,
3341  HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
3342  kernel->get()) == hipSuccess);
3343  int warpSize = 0;
3345  0, nullptr, sizeof(uint32_t), &warpSize,
3346  nullptr);
3347  int maxWarps = (max_threads + warpSize - 1) / warpSize;
3348  return getInfo(param_value_size, param_value, param_value_size_ret,
3349  static_cast<uint32_t>(maxWarps));
3350  }
3352  // Return value of 0 => not specified
3353  // TODO: Revisit if PTX is generated for compile-time work-group sizes
3354  return getInfo(param_value_size, param_value, param_value_size_ret, 0);
3355  }
3357  // Return value of 0 => unspecified or "auto" sub-group size
3358  // Correct for now, since warp size may be read from special register
3359  // TODO: Return warp size once default is primary sub-group size
3360  // TODO: Revisit if we can recover [[sub_group_size]] attribute from PTX
3361  return getInfo(param_value_size, param_value, param_value_size_ret, 0);
3362  }
3363  default:
3365  }
3366  }
3367  return PI_ERROR_INVALID_KERNEL;
3368 }
3369 
3371  assert(kernel != nullptr);
3372  assert(kernel->get_reference_count() > 0u);
3373 
3374  kernel->increment_reference_count();
3375  return PI_SUCCESS;
3376 }
3377 
3379  assert(kernel != nullptr);
3380 
3381  // double delete or someone is messing with the ref count.
3382  // either way, cannot safely proceed.
3383  assert(kernel->get_reference_count() != 0 &&
3384  "Reference count overflow detected in hip_piKernelRelease.");
3385 
3386  // decrement ref count. If it is 0, delete the program.
3387  if (kernel->decrement_reference_count() == 0) {
3388  // no internal hip resources to clean up. Just delete it.
3389  delete kernel;
3390  return PI_SUCCESS;
3391  }
3392 
3393  return PI_SUCCESS;
3394 }
3395 
3396 // A NOP for the HIP backend
3398  pi_kernel_exec_info param_name,
3399  size_t param_value_size,
3400  const void *param_value) {
3401  (void)kernel;
3402  (void)param_name;
3403  (void)param_value_size;
3404  (void)param_value;
3405 
3406  return PI_SUCCESS;
3407 }
3408 
3410  size_t, const void *) {
3411  // This entry point is only used for native specialization constants (SPIR-V),
3412  // and the HIP plugin is AOT only so this entry point is not supported.
3414  "Native specialization constants are not supported");
3415  return {};
3416 }
3417 
3419  size_t arg_size, const void *arg_value) {
3420  kernel->set_kernel_arg(arg_index, arg_size, arg_value);
3421  return PI_SUCCESS;
3422 }
3423 
3424 //
3425 // Events
3426 //
3428  (void)context;
3429  (void)event;
3430 
3431  cl::sycl::detail::pi::die("PI Event Create not implemented in HIP backend");
3432 }
3433 
3435  size_t param_value_size, void *param_value,
3436  size_t *param_value_size_ret) {
3437  assert(event != nullptr);
3438 
3439  switch (param_name) {
3441  return getInfo(param_value_size, param_value, param_value_size_ret,
3442  event->get_queue());
3444  return getInfo(param_value_size, param_value, param_value_size_ret,
3445  event->get_command_type());
3447  return getInfo(param_value_size, param_value, param_value_size_ret,
3448  event->get_reference_count());
3450  return getInfo(param_value_size, param_value, param_value_size_ret,
3451  static_cast<pi_event_status>(event->get_execution_status()));
3452  }
3453  case PI_EVENT_INFO_CONTEXT:
3454  return getInfo(param_value_size, param_value, param_value_size_ret,
3455  event->get_context());
3456  default:
3458  }
3459 
3460  return PI_ERROR_INVALID_EVENT;
3461 }
3462 
3466  pi_profiling_info param_name,
3467  size_t param_value_size,
3468  void *param_value,
3469  size_t *param_value_size_ret) {
3470 
3471  assert(event != nullptr);
3472 
3473  pi_queue queue = event->get_queue();
3474  if (queue == nullptr || !(queue->properties_ & PI_QUEUE_PROFILING_ENABLE)) {
3475  return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE;
3476  }
3477 
3478  switch (param_name) {
3481  return getInfo<pi_uint64>(param_value_size, param_value,
3482  param_value_size_ret, event->get_queued_time());
3484  return getInfo<pi_uint64>(param_value_size, param_value,
3485  param_value_size_ret, event->get_start_time());
3487  return getInfo<pi_uint64>(param_value_size, param_value,
3488  param_value_size_ret, event->get_end_time());
3489  default:
3491  }
3492  cl::sycl::detail::pi::die("Event Profiling info request not implemented");
3493  return {};
3494 }
3495 
3497  pi_int32 command_exec_callback_type,
3498  pfn_notify notify, void *user_data) {
3499  (void)event;
3500  (void)command_exec_callback_type;
3501  (void)notify;
3502  (void)user_data;
3503 
3504  cl::sycl::detail::pi::die("Event Callback not implemented in HIP backend");
3505  return PI_SUCCESS;
3506 }
3507 
3509  (void)event;
3510  (void)execution_status;
3511 
3512  cl::sycl::detail::pi::die("Event Set Status not implemented in HIP backend");
3513  return PI_ERROR_INVALID_VALUE;
3514 }
3515 
3517  assert(event != nullptr);
3518 
3519  const auto refCount = event->increment_reference_count();
3520 
3522  refCount != 0, "Reference count overflow detected in hip_piEventRetain.");
3523 
3524  return PI_SUCCESS;
3525 }
3526 
3528  assert(event != nullptr);
3529 
3530  // double delete or someone is messing with the ref count.
3531  // either way, cannot safely proceed.
3533  event->get_reference_count() != 0,
3534  "Reference count overflow detected in hip_piEventRelease.");
3535 
3536  // decrement ref count. If it is 0, delete the event.
3537  if (event->decrement_reference_count() == 0) {
3538  std::unique_ptr<_pi_event> event_ptr{event};
3539  pi_result result = PI_ERROR_INVALID_EVENT;
3540  try {
3541  ScopedContext active(event->get_context());
3542  result = event->release();
3543  } catch (...) {
3544  result = PI_ERROR_OUT_OF_RESOURCES;
3545  }
3546  return result;
3547  }
3548 
3549  return PI_SUCCESS;
3550 }
3551 
3560  pi_uint32 num_events_in_wait_list,
3561  const pi_event *event_wait_list,
3562  pi_event *event) {
3564  command_queue, num_events_in_wait_list, event_wait_list, event);
3565 }
3566 
3573  pi_uint32 num_events_in_wait_list,
3574  const pi_event *event_wait_list,
3575  pi_event *event) {
3576  if (!command_queue) {
3577  return PI_ERROR_INVALID_QUEUE;
3578  }
3579 
3580  try {
3581  ScopedContext active(command_queue->get_context());
3582 
3583  if (event_wait_list) {
3584  auto result =
3585  forLatestEvents(event_wait_list, num_events_in_wait_list,
3586  [command_queue](pi_event event) -> pi_result {
3587  return enqueueEventWait(command_queue, event);
3588  });
3589 
3590  if (result != PI_SUCCESS) {
3591  return result;
3592  }
3593  }
3594 
3595  if (event) {
3596  *event = _pi_event::make_native(PI_COMMAND_TYPE_MARKER, command_queue);
3597  (*event)->start();
3598  (*event)->record();
3599  }
3600 
3601  return PI_SUCCESS;
3602  } catch (pi_result err) {
3603  return err;
3604  } catch (...) {
3605  return PI_ERROR_UNKNOWN;
3606  }
3607 }
3608 
3616  pi_native_handle *nativeHandle) {
3617  *nativeHandle = reinterpret_cast<pi_native_handle>(event->get());
3618  return PI_SUCCESS;
3619 }
3620 
3631  bool ownNativeHandle,
3632  pi_event *event) {
3633  (void)nativeHandle;
3634  (void)context;
3635  (void)ownNativeHandle;
3636  (void)event;
3637 
3639  "Creation of PI event from native handle not implemented");
3640  return {};
3641 }
3642 
3653  const pi_sampler_properties *sampler_properties,
3654  pi_sampler *result_sampler) {
3655  std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler(context)};
3656 
3657  bool propSeen[3] = {false, false, false};
3658  for (size_t i = 0; sampler_properties[i] != 0; i += 2) {
3659  switch (sampler_properties[i]) {
3661  if (propSeen[0]) {
3662  return PI_ERROR_INVALID_VALUE;
3663  }
3664  propSeen[0] = true;
3665  retImplSampl->props_ |= sampler_properties[i + 1];
3666  break;
3668  if (propSeen[1]) {
3669  return PI_ERROR_INVALID_VALUE;
3670  }
3671  propSeen[1] = true;
3672  retImplSampl->props_ |=
3673  (sampler_properties[i + 1] - PI_SAMPLER_FILTER_MODE_NEAREST) << 1;
3674  break;
3676  if (propSeen[2]) {
3677  return PI_ERROR_INVALID_VALUE;
3678  }
3679  propSeen[2] = true;
3680  retImplSampl->props_ |=
3681  (sampler_properties[i + 1] - PI_SAMPLER_ADDRESSING_MODE_NONE) << 2;
3682  break;
3683  default:
3684  return PI_ERROR_INVALID_VALUE;
3685  }
3686  }
3687 
3688  if (!propSeen[0]) {
3689  retImplSampl->props_ |= PI_TRUE;
3690  }
3691  // Default filter mode to CL_FILTER_NEAREST
3692  if (!propSeen[2]) {
3693  retImplSampl->props_ |=
3695  << 2;
3696  }
3697 
3698  *result_sampler = retImplSampl.release();
3699  return PI_SUCCESS;
3700 }
3701 
3712  size_t param_value_size, void *param_value,
3713  size_t *param_value_size_ret) {
3714  assert(sampler != nullptr);
3715 
3716  switch (param_name) {
3718  return getInfo(param_value_size, param_value, param_value_size_ret,
3719  sampler->get_reference_count());
3721  return getInfo(param_value_size, param_value, param_value_size_ret,
3722  sampler->context_);
3724  pi_bool norm_coords_prop = static_cast<pi_bool>(sampler->props_ & 0x1);
3725  return getInfo(param_value_size, param_value, param_value_size_ret,
3726  norm_coords_prop);
3727  }
3729  pi_sampler_filter_mode filter_prop = static_cast<pi_sampler_filter_mode>(
3730  ((sampler->props_ >> 1) & 0x1) + PI_SAMPLER_FILTER_MODE_NEAREST);
3731  return getInfo(param_value_size, param_value, param_value_size_ret,
3732  filter_prop);
3733  }
3735  pi_sampler_addressing_mode addressing_prop =
3736  static_cast<pi_sampler_addressing_mode>(
3737  (sampler->props_ >> 2) + PI_SAMPLER_ADDRESSING_MODE_NONE);
3738  return getInfo(param_value_size, param_value, param_value_size_ret,
3739  addressing_prop);
3740  }
3741  default:
3743  }
3744  return {};
3745 }
3746 
3753  assert(sampler != nullptr);
3754  sampler->increment_reference_count();
3755  return PI_SUCCESS;
3756 }
3757 
3765  assert(sampler != nullptr);
3766 
3767  // double delete or someone is messing with the ref count.
3768  // either way, cannot safely proceed.
3770  sampler->get_reference_count() != 0,
3771  "Reference count overflow detected in hip_piSamplerRelease.");
3772 
3773  // decrement ref count. If it is 0, delete the sampler.
3774  if (sampler->decrement_reference_count() == 0) {
3775  delete sampler;
3776  }
3777 
3778  return PI_SUCCESS;
3779 }
3780 
3787  hipStream_t hip_stream, pi_buff_rect_region region, const void *src_ptr,
3788  const hipMemoryType src_type, pi_buff_rect_offset src_offset,
3789  size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr,
3790  const hipMemoryType dst_type, pi_buff_rect_offset dst_offset,
3791  size_t dst_row_pitch, size_t dst_slice_pitch) {
3792 
3793  assert(region != nullptr);
3794  assert(src_offset != nullptr);
3795  assert(dst_offset != nullptr);
3796 
3797  assert(src_type == hipMemoryTypeDevice || src_type == hipMemoryTypeHost);
3798  assert(dst_type == hipMemoryTypeDevice || dst_type == hipMemoryTypeHost);
3799 
3800  src_row_pitch = (!src_row_pitch) ? region->width_bytes : src_row_pitch;
3801  src_slice_pitch = (!src_slice_pitch) ? (region->height_scalar * src_row_pitch)
3802  : src_slice_pitch;
3803  dst_row_pitch = (!dst_row_pitch) ? region->width_bytes : dst_row_pitch;
3804  dst_slice_pitch = (!dst_slice_pitch) ? (region->height_scalar * dst_row_pitch)
3805  : dst_slice_pitch;
3806 
3807  HIP_MEMCPY3D params;
3808 
3809  params.WidthInBytes = region->width_bytes;
3810  params.Height = region->height_scalar;
3811  params.Depth = region->depth_scalar;
3812 
3813  params.srcMemoryType = src_type;
3814  params.srcDevice = src_type == hipMemoryTypeDevice
3815  ? *static_cast<const hipDeviceptr_t *>(src_ptr)
3816  : 0;
3817  params.srcHost = src_type == hipMemoryTypeHost ? src_ptr : nullptr;
3818  params.srcXInBytes = src_offset->x_bytes;
3819  params.srcY = src_offset->y_scalar;
3820  params.srcZ = src_offset->z_scalar;
3821  params.srcPitch = src_row_pitch;
3822  params.srcHeight = src_slice_pitch / src_row_pitch;
3823 
3824  params.dstMemoryType = dst_type;
3825  params.dstDevice = dst_type == hipMemoryTypeDevice
3826  ? *reinterpret_cast<hipDeviceptr_t *>(dst_ptr)
3827  : 0;
3828  params.dstHost = dst_type == hipMemoryTypeHost ? dst_ptr : nullptr;
3829  params.dstXInBytes = dst_offset->x_bytes;
3830  params.dstY = dst_offset->y_scalar;
3831  params.dstZ = dst_offset->z_scalar;
3832  params.dstPitch = dst_row_pitch;
3833  params.dstHeight = dst_slice_pitch / dst_row_pitch;
3834 
3835  return PI_CHECK_ERROR(hipDrvMemcpy3DAsync(&params, hip_stream));
3836 
3837  return PI_SUCCESS;
3838 }
3839 
3841  pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
3842  pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
3843  pi_buff_rect_region region, size_t buffer_row_pitch,
3844  size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
3845  void *ptr, pi_uint32 num_events_in_wait_list,
3846  const pi_event *event_wait_list, pi_event *event) {
3847 
3848  assert(buffer != nullptr);
3849  assert(command_queue != nullptr);
3850 
3851  pi_result retErr = PI_SUCCESS;
3852  hipStream_t hipStream = command_queue->get();
3853  void *devPtr = buffer->mem_.buffer_mem_.get_void();
3854  std::unique_ptr<_pi_event> retImplEv{nullptr};
3855 
3856  try {
3857  ScopedContext active(command_queue->get_context());
3858 
3859  retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
3860  event_wait_list, nullptr);
3861 
3862  if (event) {
3863  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3864  PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT, command_queue));
3865  retImplEv->start();
3866  }
3867 
3869  hipStream, region, &devPtr, hipMemoryTypeDevice, buffer_offset,
3870  buffer_row_pitch, buffer_slice_pitch, ptr, hipMemoryTypeHost,
3871  host_offset, host_row_pitch, host_slice_pitch);
3872 
3873  if (event) {
3874  retErr = retImplEv->record();
3875  }
3876 
3877  if (blocking_read) {
3878  retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
3879  }
3880 
3881  if (event) {
3882  *event = retImplEv.release();
3883  }
3884 
3885  } catch (pi_result err) {
3886  retErr = err;
3887  }
3888  return retErr;
3889 }
3890 
3892  pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
3893  pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
3894  pi_buff_rect_region region, size_t buffer_row_pitch,
3895  size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
3896  const void *ptr, pi_uint32 num_events_in_wait_list,
3897  const pi_event *event_wait_list, pi_event *event) {
3898 
3899  assert(buffer != nullptr);
3900  assert(command_queue != nullptr);
3901 
3902  pi_result retErr = PI_SUCCESS;
3903  hipStream_t hipStream = command_queue->get();
3904  void *devPtr = buffer->mem_.buffer_mem_.get_void();
3905  std::unique_ptr<_pi_event> retImplEv{nullptr};
3906 
3907  try {
3908  ScopedContext active(command_queue->get_context());
3909 
3910  retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
3911  event_wait_list, nullptr);
3912 
3913  if (event) {
3914  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3915  PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT, command_queue));
3916  retImplEv->start();
3917  }
3918 
3920  hipStream, region, ptr, hipMemoryTypeHost, host_offset, host_row_pitch,
3921  host_slice_pitch, &devPtr, hipMemoryTypeDevice, buffer_offset,
3922  buffer_row_pitch, buffer_slice_pitch);
3923 
3924  if (event) {
3925  retErr = retImplEv->record();
3926  }
3927 
3928  if (blocking_write) {
3929  retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
3930  }
3931 
3932  if (event) {
3933  *event = retImplEv.release();
3934  }
3935 
3936  } catch (pi_result err) {
3937  retErr = err;
3938  }
3939  return retErr;
3940 }
3941 
3943  pi_mem dst_buffer, size_t src_offset,
3944  size_t dst_offset, size_t size,
3945  pi_uint32 num_events_in_wait_list,
3946  const pi_event *event_wait_list,
3947  pi_event *event) {
3948  if (!command_queue) {
3949  return PI_ERROR_INVALID_QUEUE;
3950  }
3951 
3952  std::unique_ptr<_pi_event> retImplEv{nullptr};
3953 
3954  try {
3955  ScopedContext active(command_queue->get_context());
3956 
3957  if (event_wait_list) {
3958  hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
3959  event_wait_list, nullptr);
3960  }
3961 
3962  pi_result result;
3963 
3964  if (event) {
3965  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
3966  PI_COMMAND_TYPE_MEM_BUFFER_COPY, command_queue));
3967  result = retImplEv->start();
3968  }
3969 
3970  auto stream = command_queue->get();
3971  auto src = src_buffer->mem_.buffer_mem_.get_with_offset(src_offset);
3972  auto dst = dst_buffer->mem_.buffer_mem_.get_with_offset(dst_offset);
3973 
3974  result = PI_CHECK_ERROR(hipMemcpyDtoDAsync(dst, src, size, stream));
3975 
3976  if (event) {
3977  result = retImplEv->record();
3978  *event = retImplEv.release();
3979  }
3980 
3981  return result;
3982  } catch (pi_result err) {
3983  return err;
3984  } catch (...) {
3985  return PI_ERROR_UNKNOWN;
3986  }
3987 }
3988 
3990  pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer,
3991  pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
3992  pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
3993  size_t dst_row_pitch, size_t dst_slice_pitch,
3994  pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3995  pi_event *event) {
3996 
3997  assert(src_buffer != nullptr);
3998  assert(dst_buffer != nullptr);
3999  assert(command_queue != nullptr);
4000 
4001  pi_result retErr = PI_SUCCESS;
4002  hipStream_t hipStream = command_queue->get();
4003  void *srcPtr = src_buffer->mem_.buffer_mem_.get_void();
4004  void *dstPtr = dst_buffer->mem_.buffer_mem_.get_void();
4005  std::unique_ptr<_pi_event> retImplEv{nullptr};
4006 
4007  try {
4008  ScopedContext active(command_queue->get_context());
4009 
4010  retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4011  event_wait_list, nullptr);
4012 
4013  if (event) {
4014  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
4015  PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT, command_queue));
4016  retImplEv->start();
4017  }
4018 
4020  hipStream, region, &srcPtr, hipMemoryTypeDevice, src_origin,
4021  src_row_pitch, src_slice_pitch, &dstPtr, hipMemoryTypeDevice,
4022  dst_origin, dst_row_pitch, dst_slice_pitch);
4023 
4024  if (event) {
4025  retImplEv->record();
4026  *event = retImplEv.release();
4027  }
4028 
4029  } catch (pi_result err) {
4030  retErr = err;
4031  }
4032  return retErr;
4033 }
4034 
4036  const void *pattern, size_t pattern_size,
4037  size_t offset, size_t size,
4038  pi_uint32 num_events_in_wait_list,
4039  const pi_event *event_wait_list,
4040  pi_event *event) {
4041  assert(command_queue != nullptr);
4042 
4043  auto args_are_multiples_of_pattern_size =
4044  (offset % pattern_size == 0) || (size % pattern_size == 0);
4045 
4046  auto pattern_is_valid = (pattern != nullptr);
4047 
4048  auto pattern_size_is_valid =
4049  ((pattern_size & (pattern_size - 1)) == 0) && // is power of two
4050  (pattern_size > 0) && (pattern_size <= 128); // falls within valid range
4051 
4052  assert(args_are_multiples_of_pattern_size && pattern_is_valid &&
4053  pattern_size_is_valid);
4054  (void)args_are_multiples_of_pattern_size;
4055  (void)pattern_is_valid;
4056  (void)pattern_size_is_valid;
4057 
4058  std::unique_ptr<_pi_event> retImplEv{nullptr};
4059 
4060  try {
4061  ScopedContext active(command_queue->get_context());
4062 
4063  if (event_wait_list) {
4064  hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4065  event_wait_list, nullptr);
4066  }
4067 
4068  pi_result result;
4069 
4070  if (event) {
4071  retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
4072  PI_COMMAND_TYPE_MEM_BUFFER_FILL, command_queue));
4073  result = retImplEv->start();
4074  }
4075 
4076  auto dstDevice = buffer->mem_.buffer_mem_.get_with_offset(offset);
4077  auto stream = command_queue->get();
4078  auto N = size / pattern_size;
4079 
4080  // pattern size in bytes
4081  switch (pattern_size) {
4082  case 1: {
4083  auto value = *static_cast<const uint8_t *>(pattern);
4084  result = PI_CHECK_ERROR(hipMemsetD8Async(dstDevice, value, N, stream));
4085  break;
4086  }
4087  case 2: {
4088  auto value = *static_cast<const uint16_t *>(pattern);
4089  result = PI_CHECK_ERROR(hipMemsetD16Async(dstDevice, value, N, stream));
4090  break;
4091  }
4092  case 4: {
4093  auto value = *static_cast<const uint32_t *>(pattern);
4094  result = PI_CHECK_ERROR(hipMemsetD32Async(dstDevice, value, N, stream));
4095  break;
4096  }
4097 
4098  default: {
4099  // HIP has no memset functions that allow setting values more than 4
4100  // bytes. PI API lets you pass an arbitrary "pattern" to the buffer
4101  // fill, which can be more than 4 bytes. We must break up the pattern
4102  // into 1 byte values, and set the buffer using multiple strided calls.
4103  // The first 4 patterns are set using hipMemsetD32Async then all
4104  // subsequent 1 byte patterns are set using hipMemset2DAsync which is
4105  // called for each pattern.
4106 
4107  // Calculate the number of patterns, stride, number of times the pattern
4108  // needs to be applied, and the number of times the first 32 bit pattern
4109  // needs to be applied.
4110  auto number_of_steps = pattern_size / sizeof(uint8_t);
4111  auto pitch = number_of_steps * sizeof(uint8_t);
4112  auto height = size / number_of_steps;
4113  auto count_32 = size / sizeof(uint32_t);
4114 
4115  // Get 4-byte chunk of the pattern and call hipMemsetD32Async
4116  auto value = *(static_cast<const uint32_t *>(pattern));
4117  result =
4118  PI_CHECK_ERROR(hipMemsetD32Async(dstDevice, value, count_32, stream));
4119  for (auto step = 4u; step < number_of_steps; ++step) {
4120  // take 1 byte of the pattern
4121  value = *(static_cast<const uint8_t *>(pattern) + step);
4122 
4123  // offset the pointer to the part of the buffer we want to write to
4124  auto offset_ptr = reinterpret_cast<void *>(
4125  reinterpret_cast<uint8_t *>(dstDevice) + (step * sizeof(uint8_t)));
4126 
4127  // set all of the pattern chunks
4128  result = PI_CHECK_ERROR(hipMemset2DAsync(
4129  offset_ptr, pitch, value, sizeof(uint8_t), height, stream));
4130  }
4131  break;
4132  }
4133  }
4134 
4135  if (event) {
4136  result = retImplEv->record();
4137  *event = retImplEv.release();
4138  }
4139 
4140  return result;
4141  } catch (pi_result err) {
4142  return err;
4143  } catch (...) {
4144  return PI_ERROR_UNKNOWN;
4145  }
4146 }
4147 
4148 static size_t imageElementByteSize(hipArray_Format array_format) {
4149  switch (array_format) {
4150  case HIP_AD_FORMAT_UNSIGNED_INT8:
4151  case HIP_AD_FORMAT_SIGNED_INT8:
4152  return 1;
4153  case HIP_AD_FORMAT_UNSIGNED_INT16:
4154  case HIP_AD_FORMAT_SIGNED_INT16:
4155  case HIP_AD_FORMAT_HALF:
4156  return 2;
4157  case HIP_AD_FORMAT_UNSIGNED_INT32:
4158  case HIP_AD_FORMAT_SIGNED_INT32:
4159  case HIP_AD_FORMAT_FLOAT:
4160  return 4;
4161  default:
4162  return 0;
4163  }
4164  cl::sycl::detail::pi::die("Invalid iamge format.");
4165  return 0;
4166 }
4167 
4173 
4175  hipStream_t hip_stream, pi_mem_type img_type, const size_t *region,
4176  const void *src_ptr, const hipMemoryType src_type, const size_t *src_offset,
4177  void *dst_ptr, const hipMemoryType dst_type, const size_t *dst_offset) {
4178  assert(region != nullptr);
4179 
4180  assert(src_type == hipMemoryTypeArray || src_type == hipMemoryTypeHost);
4181  assert(dst_type == hipMemoryTypeArray || dst_type == hipMemoryTypeHost);
4182 
4183  if (img_type == PI_MEM_TYPE_IMAGE2D) {
4184  hip_Memcpy2D cpyDesc;
4185  memset(&cpyDesc, 0, sizeof(cpyDesc));
4186  cpyDesc.srcMemoryType = src_type;
4187  if (src_type == hipMemoryTypeArray) {
4188  cpyDesc.srcArray =
4189  reinterpret_cast<hipCUarray>(const_cast<void *>(src_ptr));
4190  cpyDesc.srcXInBytes = src_offset[0];
4191  cpyDesc.srcY = src_offset[1];
4192  } else {
4193  cpyDesc.srcHost = src_ptr;
4194  }
4195  cpyDesc.dstMemoryType = dst_type;
4196  if (dst_type == hipMemoryTypeArray) {
4197  cpyDesc.dstArray =
4198  reinterpret_cast<hipCUarray>(const_cast<void *>(dst_ptr));
4199  cpyDesc.dstXInBytes = dst_offset[0];
4200  cpyDesc.dstY = dst_offset[1];
4201  } else {
4202  cpyDesc.dstHost = dst_ptr;
4203  }
4204  cpyDesc.WidthInBytes = region[0];
4205  cpyDesc.Height = region[1];
4206  return PI_CHECK_ERROR(hipMemcpyParam2DAsync(&cpyDesc, hip_stream));
4207  }
4208 
4209  if (img_type == PI_MEM_TYPE_IMAGE3D) {
4210 
4211  HIP_MEMCPY3D cpyDesc;
4212  memset(&cpyDesc, 0, sizeof(cpyDesc));
4213  cpyDesc.srcMemoryType = src_type;
4214  if (src_type == hipMemoryTypeArray) {
4215  cpyDesc.srcArray =
4216  reinterpret_cast<hipCUarray>(const_cast<void *>(src_ptr));
4217  cpyDesc.srcXInBytes = src_offset[0];
4218  cpyDesc.srcY = src_offset[1];
4219  cpyDesc.srcZ = src_offset[2];
4220  } else {
4221  cpyDesc.srcHost = src_ptr;
4222  }
4223  cpyDesc.dstMemoryType = dst_type;
4224  if (dst_type == hipMemoryTypeArray) {
4225  cpyDesc.dstArray = reinterpret_cast<hipCUarray>(dst_ptr);
4226  cpyDesc.dstXInBytes = dst_offset[0];
4227  cpyDesc.dstY = dst_offset[1];
4228  cpyDesc.dstZ = dst_offset[2];
4229  } else {
4230  cpyDesc.dstHost = dst_ptr;
4231  }
4232  cpyDesc.WidthInBytes = region[0];
4233  cpyDesc.Height = region[1];
4234  cpyDesc.Depth = region[2];
4235  return PI_CHECK_ERROR(hipDrvMemcpy3DAsync(&cpyDesc, hip_stream));
4236  return PI_ERROR_UNKNOWN;
4237  }
4238 
4239  return PI_ERROR_INVALID_VALUE;
4240 }
4241 
4243  pi_bool blocking_read, const size_t *origin,
4244  const size_t *region, size_t row_pitch,
4245  size_t slice_pitch, void *ptr,
4246  pi_uint32 num_events_in_wait_list,
4247  const pi_event *event_wait_list,
4248  pi_event *event) {
4249  (void)row_pitch;
4250  (void)slice_pitch;
4251 
4252  assert(command_queue != nullptr);
4253  assert(image != nullptr);
4254  assert(image->mem_type_ == _pi_mem::mem_type::surface);
4255 
4256  pi_result retErr = PI_SUCCESS;
4257  hipStream_t hipStream = command_queue->get();
4258 
4259  try {
4260  ScopedContext active(command_queue->get_context());
4261 
4262  if (event_wait_list) {
4263  hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4264  event_wait_list, nullptr);
4265  }
4266 
4267  hipArray *array = image->mem_.surface_mem_.get_array();
4268 
4269  hipArray_Format Format;
4270  size_t NumChannels;
4271  getArrayDesc(array, Format, NumChannels);
4272 
4273  int elementByteSize = imageElementByteSize(Format);
4274 
4275  size_t byteOffsetX = origin[0] * elementByteSize * NumChannels;
4276  size_t bytesToCopy = elementByteSize * NumChannels * region[0];
4277 
4278  pi_mem_type imgType = image->mem_.surface_mem_.get_image_type();
4279 
4280  size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
4281  size_t srcOffset[3] = {byteOffsetX, origin[1], origin[2]};
4282 
4283  retErr = commonEnqueueMemImageNDCopy(hipStream, imgType, adjustedRegion,
4284  array, hipMemoryTypeArray, srcOffset,
4285  ptr, hipMemoryTypeHost, nullptr);
4286 
4287  if (retErr != PI_SUCCESS) {
4288  return retErr;
4289  }
4290 
4291  if (event) {
4292  auto new_event =
4294  new_event->record();
4295  *event = new_event;
4296  }
4297 
4298  if (blocking_read) {
4299  retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
4300  }
4301  } catch (pi_result err) {
4302  return err;
4303  } catch (...) {
4304  return PI_ERROR_UNKNOWN;
4305  }
4306  return PI_SUCCESS;
4307  return retErr;
4308 }
4309 
4311  pi_bool blocking_write,
4312  const size_t *origin, const size_t *region,
4313  size_t input_row_pitch,
4314  size_t input_slice_pitch, const void *ptr,
4315  pi_uint32 num_events_in_wait_list,
4316  const pi_event *event_wait_list,
4317  pi_event *event) {
4318  (void)blocking_write;
4319  (void)input_row_pitch;
4320  (void)input_slice_pitch;
4321  assert(command_queue != nullptr);
4322  assert(image != nullptr);
4323  assert(image->mem_type_ == _pi_mem::mem_type::surface);
4324 
4325  pi_result retErr = PI_SUCCESS;
4326  hipStream_t hipStream = command_queue->get();
4327 
4328  try {
4329  ScopedContext active(command_queue->get_context());
4330 
4331  if (event_wait_list) {
4332  hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4333  event_wait_list, nullptr);
4334  }
4335 
4336  hipArray *array = image->mem_.surface_mem_.get_array();
4337 
4338  hipArray_Format Format;
4339  size_t NumChannels;
4340  getArrayDesc(array, Format, NumChannels);
4341 
4342  int elementByteSize = imageElementByteSize(Format);
4343 
4344  size_t byteOffsetX = origin[0] * elementByteSize * NumChannels;
4345  size_t bytesToCopy = elementByteSize * NumChannels * region[0];
4346 
4347  pi_mem_type imgType = image->mem_.surface_mem_.get_image_type();
4348 
4349  size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
4350  size_t dstOffset[3] = {byteOffsetX, origin[1], origin[2]};
4351 
4352  retErr = commonEnqueueMemImageNDCopy(hipStream, imgType, adjustedRegion,
4353  ptr, hipMemoryTypeHost, nullptr, array,
4354  hipMemoryTypeArray, dstOffset);
4355 
4356  if (retErr != PI_SUCCESS) {
4357  return retErr;
4358  }
4359 
4360  if (event) {
4361  auto new_event =
4363  new_event->record();
4364  *event = new_event;
4365  }
4366  } catch (pi_result err) {
4367  return err;
4368  } catch (...) {
4369  return PI_ERROR_UNKNOWN;
4370  }
4371 
4372  return PI_SUCCESS;
4373 
4374  return retErr;
4375 }
4376 
4378  pi_mem dst_image, const size_t *src_origin,
4379  const size_t *dst_origin,
4380  const size_t *region,
4381  pi_uint32 num_events_in_wait_list,
4382  const pi_event *event_wait_list,
4383  pi_event *event) {
4384 
4385  assert(src_image->mem_type_ == _pi_mem::mem_type::surface);
4386  assert(dst_image->mem_type_ == _pi_mem::mem_type::surface);
4387  assert(src_image->mem_.surface_mem_.get_image_type() ==
4388  dst_image->mem_.surface_mem_.get_image_type());
4389 
4390  pi_result retErr = PI_SUCCESS;
4391  hipStream_t hipStream = command_queue->get();
4392 
4393  try {
4394  ScopedContext active(command_queue->get_context());
4395 
4396  if (event_wait_list) {
4397  hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4398  event_wait_list, nullptr);
4399  }
4400 
4401  hipArray *srcArray = src_image->mem_.surface_mem_.get_array();
4402  hipArray_Format srcFormat;
4403  size_t srcNumChannels;
4404  getArrayDesc(srcArray, srcFormat, srcNumChannels);
4405 
4406  hipArray *dstArray = dst_image->mem_.surface_mem_.get_array();
4407  hipArray_Format dstFormat;
4408  size_t dstNumChannels;
4409  getArrayDesc(dstArray, dstFormat, dstNumChannels);
4410 
4411  assert(srcFormat == dstFormat);
4412  assert(srcNumChannels == dstNumChannels);
4413 
4414  int elementByteSize = imageElementByteSize(srcFormat);
4415 
4416  size_t dstByteOffsetX = dst_origin[0] * elementByteSize * srcNumChannels;
4417  size_t srcByteOffsetX = src_origin[0] * elementByteSize * dstNumChannels;
4418  size_t bytesToCopy = elementByteSize * srcNumChannels * region[0];
4419 
4420  pi_mem_type imgType = src_image->mem_.surface_mem_.get_image_type();
4421 
4422  size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
4423  size_t srcOffset[3] = {srcByteOffsetX, src_origin[1], src_origin[2]};
4424  size_t dstOffset[3] = {dstByteOffsetX, dst_origin[1], dst_origin[2]};
4425 
4426  retErr = commonEnqueueMemImageNDCopy(
4427  hipStream, imgType, adjustedRegion, srcArray, hipMemoryTypeArray,
4428  srcOffset, dstArray, hipMemoryTypeArray, dstOffset);
4429 
4430  if (retErr != PI_SUCCESS) {
4431  return retErr;
4432  }
4433 
4434  if (event) {
4435  auto new_event =
4437  new_event->record();
4438  *event = new_event;
4439  }
4440  } catch (pi_result err) {
4441  return err;
4442  } catch (...) {
4443  return PI_ERROR_UNKNOWN;
4444  }
4445 
4446  return PI_SUCCESS;
4447  return retErr;
4448 }
4449 
4452  const void *fill_color,
4453  const size_t *origin, const size_t *region,
4454  pi_uint32 num_events_in_wait_list,
4455  const pi_event *event_wait_list,
4456  pi_event *event) {
4457  (void)command_queue;
4458  (void)image;
4459  (void)fill_color;
4460  (void)origin;
4461  (void)region;
4462  (void)num_events_in_wait_list;
4463  (void)event_wait_list;
4464  (void)event;
4465 
4466  cl::sycl::detail::pi::die("hip_piEnqueueMemImageFill not implemented");
4467  return {};
4468 }
4469 
4476  pi_bool blocking_map,
4477  pi_map_flags map_flags, size_t offset,
4478  size_t size,
4479  pi_uint32 num_events_in_wait_list,
4480  const pi_event *event_wait_list,
4481  pi_event *event, void **ret_map) {
4482  assert(ret_map != nullptr);
4483  assert(command_queue != nullptr);
4484  assert(buffer != nullptr);
4485  assert(buffer->mem_type_ == _pi_mem::mem_type::buffer);
4486 
4487  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
4488  const bool is_pinned = buffer->mem_.buffer_mem_.allocMode_ ==
4490 
4491  // Currently no support for overlapping regions
4492  if (buffer->mem_.buffer_mem_.get_map_ptr() != nullptr) {
4493  return ret_err;
4494  }
4495 
4496  // Allocate a pointer in the host to store the mapped information
4497  auto hostPtr = buffer->mem_.buffer_mem_.map_to_ptr(offset, map_flags);
4498  *ret_map = buffer->mem_.buffer_mem_.get_map_ptr();
4499  if (hostPtr) {
4500  ret_err = PI_SUCCESS;
4501  }
4502 
4503  if (!is_pinned && ((map_flags & PI_MAP_READ) || (map_flags & PI_MAP_WRITE))) {
4504  // Pinned host memory is already on host so it doesn't need to be read.
4505  ret_err = hip_piEnqueueMemBufferRead(
4506  command_queue, buffer, blocking_map, offset, size, hostPtr,
4507  num_events_in_wait_list, event_wait_list, event);
4508  } else {
4509  ScopedContext active(command_queue->get_context());
4510 
4511  if (is_pinned) {
4512  ret_err = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4513  event_wait_list, nullptr);
4514  }
4515 
4516  if (event) {
4517  try {
4519  command_queue);
4520  (*event)->start();
4521  (*event)->record();
4522  } catch (pi_result error) {
4523  ret_err = error;
4524  }
4525  }
4526  }
4527 
4528  return ret_err;
4529 }
4530 
4536  void *mapped_ptr,
4537  pi_uint32 num_events_in_wait_list,
4538  const pi_event *event_wait_list,
4539  pi_event *event) {
4540  pi_result ret_err = PI_SUCCESS;
4541 
4542  assert(command_queue != nullptr);
4543  assert(mapped_ptr != nullptr);
4544  assert(memobj != nullptr);
4545  assert(memobj->mem_type_ == _pi_mem::mem_type::buffer);
4546  assert(memobj->mem_.buffer_mem_.get_map_ptr() != nullptr);
4547  assert(memobj->mem_.buffer_mem_.get_map_ptr() == mapped_ptr);
4548 
4549  const bool is_pinned = memobj->mem_.buffer_mem_.allocMode_ ==
4551 
4552  if (!is_pinned &&
4553  ((memobj->mem_.buffer_mem_.get_map_flags() & PI_MAP_WRITE) ||
4554  (memobj->mem_.buffer_mem_.get_map_flags() &
4556  // Pinned host memory is only on host so it doesn't need to be written to.
4557  ret_err = hip_piEnqueueMemBufferWrite(
4558  command_queue, memobj, true,
4559  memobj->mem_.buffer_mem_.get_map_offset(mapped_ptr),
4560  memobj->mem_.buffer_mem_.get_size(), mapped_ptr,
4561  num_events_in_wait_list, event_wait_list, event);
4562  } else {
4563  ScopedContext active(command_queue->get_context());
4564 
4565  if (is_pinned) {
4566  ret_err = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
4567  event_wait_list, nullptr);
4568  }
4569 
4570  if (event) {
4571  try {
4573  command_queue);
4574  (*event)->start();
4575  (*event)->record();
4576  } catch (pi_result error) {
4577  ret_err = error;
4578  }
4579  }
4580  }
4581 
4582  memobj->mem_.buffer_mem_.unmap(mapped_ptr);
4583  return ret_err;
4584 }
4585 
4589  pi_usm_mem_properties *properties, size_t size,
4590  pi_uint32 alignment) {
4591  assert(result_ptr != nullptr);
4592  assert(context != nullptr);
4593  assert(properties == nullptr || *properties == 0);
4594  pi_result result = PI_SUCCESS;
4595  try {
4596  ScopedContext active(context);
4597  result = PI_CHECK_ERROR(hipHostMalloc(result_ptr, size));
4598  } catch (pi_result error) {
4599  result = error;
4600  }
4601 
4602  assert(alignment == 0 ||
4603  (result == PI_SUCCESS &&
4604  reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
4605  return result;
4606 }
4607 
4611  pi_device device,
4612  pi_usm_mem_properties *properties,
4613  size_t size, pi_uint32 alignment) {
4614  assert(result_ptr != nullptr);
4615  assert(context != nullptr);
4616  assert(device != nullptr);
4617  assert(properties == nullptr || *properties == 0);
4618  pi_result result = PI_SUCCESS;
4619  try {
4620  ScopedContext active(context);
4621  result = PI_CHECK_ERROR(hipMalloc(result_ptr, size));
4622  } catch (pi_result error) {
4623  result = error;
4624  }
4625 
4626  assert(alignment == 0 ||
4627  (result == PI_SUCCESS &&
4628  reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
4629  return result;
4630 }
4631 
4635  pi_device device,
4636  pi_usm_mem_properties *properties,
4637  size_t size, pi_uint32 alignment) {
4638  assert(result_ptr != nullptr);
4639  assert(context != nullptr);
4640  assert(device != nullptr);
4641  assert(properties == nullptr || *properties == 0);
4642  pi_result result = PI_SUCCESS;
4643  try {
4644  ScopedContext active(context);
4645  result =
4646  PI_CHECK_ERROR(hipMallocManaged(result_ptr, size, hipMemAttachGlobal));
4647  } catch (pi_result error) {
4648  result = error;
4649  }
4650 
4651  assert(alignment == 0 ||
4652  (result == PI_SUCCESS &&
4653  reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
4654  return result;
4655 }
4656 
4660 
4661  assert(context != nullptr);
4662  pi_result result = PI_SUCCESS;
4663  try {
4664  ScopedContext active(context);
4665  unsigned int type;
4666  hipPointerAttribute_t hipPointerAttributeType;
4667  result =
4668  PI_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, ptr));
4669  type = hipPointerAttributeType.memoryType;
4670  assert(type == hipMemoryTypeDevice or type == hipMemoryTypeHost);
4671  if (type == hipMemoryTypeDevice) {
4672  result = PI_CHECK_ERROR(hipFree(ptr));
4673  }
4674  if (type == hipMemoryTypeHost) {
4675  result = PI_CHECK_ERROR(hipFreeHost(ptr));
4676  }
4677  } catch (pi_result error) {
4678  result = error;
4679  }
4680  return result;
4681 }
4682 
4684  size_t count,
4685  pi_uint32 num_events_in_waitlist,
4686  const pi_event *events_waitlist,
4687  pi_event *event) {
4688 
4689  assert(queue != nullptr);
4690  assert(ptr != nullptr);
4691  hipStream_t hipStream = queue->get();
4692  pi_result result = PI_SUCCESS;
4693  std::unique_ptr<_pi_event> event_ptr{nullptr};
4694 
4695  try {
4696  ScopedContext active(queue->get_context());
4697  result = hip_piEnqueueEventsWait(queue, num_events_in_waitlist,
4698  events_waitlist, nullptr);
4699  if (event) {
4700  event_ptr = std::unique_ptr<_pi_event>(
4702  event_ptr->start();
4703  }
4704  result = PI_CHECK_ERROR(
4705  hipMemsetD8Async(reinterpret_cast<hipDeviceptr_t>(ptr),
4706  (unsigned char)value & 0xFF, count, hipStream));
4707  if (event) {
4708  result = event_ptr->record();
4709  *event = event_ptr.release();
4710  }
4711  } catch (pi_result err) {
4712  result = err;
4713  }
4714 
4715  return result;
4716 }
4717 
4719  void *dst_ptr, const void *src_ptr,
4720  size_t size,
4721  pi_uint32 num_events_in_waitlist,
4722  const pi_event *events_waitlist,
4723  pi_event *event) {
4724 
4725  assert(queue != nullptr);
4726  assert(dst_ptr != nullptr);
4727  assert(src_ptr != nullptr);
4728  hipStream_t hipStream = queue->get();
4729  pi_result result = PI_SUCCESS;
4730  std::unique_ptr<_pi_event> event_ptr{nullptr};
4731 
4732  try {
4733  ScopedContext active(queue->get_context());
4734  result = hip_piEnqueueEventsWait(queue, num_events_in_waitlist,
4735  events_waitlist, nullptr);
4736  if (event) {
4737  event_ptr = std::unique_ptr<_pi_event>(
4739  event_ptr->start();
4740  }
4741  result = PI_CHECK_ERROR(
4742  hipMemcpyAsync(dst_ptr, src_ptr, size, hipMemcpyDefault, hipStream));
4743  if (event) {
4744  result = event_ptr->record();
4745  }
4746  if (blocking) {
4747  result = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
4748  }
4749  if (event) {
4750  *event = event_ptr.release();
4751  }
4752  } catch (pi_result err) {
4753  result = err;
4754  }
4755 
4756  return result;
4757 }
4758 
4760  size_t size, pi_usm_migration_flags flags,
4761  pi_uint32 num_events_in_waitlist,
4762  const pi_event *events_waitlist,
4763  pi_event *event) {
4764 
4765  // flags is currently unused so fail if set
4766  if (flags != 0)
4767  return PI_ERROR_INVALID_VALUE;
4768  assert(queue != nullptr);
4769  assert(ptr != nullptr);
4770  hipStream_t hipStream = queue->get();
4771  pi_result result = PI_SUCCESS;
4772  std::unique_ptr<_pi_event> event_ptr{nullptr};
4773 
4774  try {
4775  ScopedContext active(queue->get_context());
4776  result = hip_piEnqueueEventsWait(queue, num_events_in_waitlist,
4777  events_waitlist, nullptr);
4778  if (event) {
4779  event_ptr = std::unique_ptr<_pi_event>(
4781  event_ptr->start();
4782  }
4783  result = PI_CHECK_ERROR(hipMemPrefetchAsync(
4784  ptr, size, queue->get_context()->get_device()->get(), hipStream));
4785  if (event) {
4786  result = event_ptr->record();
4787  *event = event_ptr.release();
4788  }
4789  } catch (pi_result err) {
4790  result = err;
4791  }
4792 
4793  return result;
4794 }
4795 
4798  size_t length, pi_mem_advice advice,
4799  pi_event *event) {
4800  (void)length;
4801  (void)advice;
4802 
4803  assert(queue != nullptr);
4804  assert(ptr != nullptr);
4805  // TODO implement a mapping to hipMemAdvise once the expected behaviour
4806  // of piextUSMEnqueueMemAdvise is detailed in the USM extension
4807  return hip_piEnqueueEventsWait(queue, 0, nullptr, event);
4808 
4809  return PI_SUCCESS;
4810 }
4811 
4829  pi_mem_alloc_info param_name,
4830  size_t param_value_size,
4831  void *param_value,
4832  size_t *param_value_size_ret) {
4833 
4834  assert(context != nullptr);
4835  assert(ptr != nullptr);
4836  pi_result result = PI_SUCCESS;
4837  hipPointerAttribute_t hipPointerAttributeType;
4838 
4839  try {
4840  ScopedContext active(context);
4841  switch (param_name) {
4842  case PI_MEM_ALLOC_TYPE: {
4843  unsigned int value;
4844  // do not throw if hipPointerGetAttribute returns hipErrorInvalidValue
4845  hipError_t ret = hipPointerGetAttributes(&hipPointerAttributeType, ptr);
4846  if (ret == hipErrorInvalidValue) {
4847  // pointer not known to the HIP subsystem
4848  return getInfo(param_value_size, param_value, param_value_size_ret,
4850  }
4851  result = check_error(ret, __func__, __LINE__ - 5, __FILE__);
4852  value = hipPointerAttributeType.isManaged;
4853  if (value) {
4854  // pointer to managed memory
4855  return getInfo(param_value_size, param_value, param_value_size_ret,
4857  }
4858  result = PI_CHECK_ERROR(
4859  hipPointerGetAttributes(&hipPointerAttributeType, ptr));
4860  value = hipPointerAttributeType.memoryType;
4861  assert(value == hipMemoryTypeDevice or value == hipMemoryTypeHost);
4862  if (value == hipMemoryTypeDevice) {
4863  // pointer to device memory
4864  return getInfo(param_value_size, param_value, param_value_size_ret,
4866  }
4867  if (value == hipMemoryTypeHost) {
4868  // pointer to host memory
4869  return getInfo(param_value_size, param_value, param_value_size_ret,
4871  }
4872  // should never get here
4873  __builtin_unreachable();
4874  return getInfo(param_value_size, param_value, param_value_size_ret,
4876  }
4877  case PI_MEM_ALLOC_BASE_PTR: {
4878  return PI_ERROR_INVALID_VALUE;
4879  }
4880  case PI_MEM_ALLOC_SIZE: {
4881  return PI_ERROR_INVALID_VALUE;
4882  }
4883 
4884  case PI_MEM_ALLOC_DEVICE: {
4885  // get device index associated with this pointer
4886  result = PI_CHECK_ERROR(
4887  hipPointerGetAttributes(&hipPointerAttributeType, ptr));
4888  int device_idx = hipPointerAttributeType.device;
4889 
4890  // currently each device is in its own platform, so find the platform at
4891  // the same index
4892  std::vector<pi_platform> platforms;
4893  platforms.resize(device_idx + 1);
4894  result = hip_piPlatformsGet(device_idx + 1, platforms.data(), nullptr);
4895 
4896  // get the device from the platform
4897  pi_device device = platforms[device_idx]->devices_[0].get();
4898  return getInfo(param_value_size, param_value, param_value_size_ret,
4899  device);
4900  }
4901  }
4902  } catch (pi_result error) {
4903  result = error;
4904  }
4905 
4906  return result;
4907 }
4908 
4909 // This API is called by Sycl RT to notify the end of the plugin lifetime.
4910 // TODO: add a global variable lifetime management code here (see
4911 // pi_level_zero.cpp for reference) Currently this is just a NOOP.
4912 pi_result hip_piTearDown(void *PluginParameter) {
4913  (void)PluginParameter;
4914  return PI_SUCCESS;
4915 }
4916 
4918 
4920  // Check that the major version matches in PiVersion and SupportedVersion
4922 
4923  // PI interface supports higher version or the same version.
4924  size_t PluginVersionSize = sizeof(PluginInit->PluginVersion);
4925  if (strlen(SupportedVersion) >= PluginVersionSize)
4926  return PI_ERROR_INVALID_VALUE;
4927  strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize);
4928 
4929  // Set whole function table to zero to make it easier to detect if
4930  // functions are not set up below.
4931  std::memset(&(PluginInit->PiFunctionTable), 0,
4932  sizeof(PluginInit->PiFunctionTable));
4933 
4934 // Forward calls to HIP RT.
4935 #define _PI_CL(pi_api, hip_api) \
4936  (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&hip_api);
4937 
4938  // Platform
4941  // Device
4952  // Context
4961  // Queue
4970  // Memory
4980  // Program
4994  // Kernel
5006  // Event
5017  // Sampler
5022  // Queue commands
5040  // USM
5050 
5053  _PI_CL(piPluginGetLastError, hip_piPluginGetLastError)
5055 
5056 #undef _PI_CL
5057 
5058  return PI_SUCCESS;
5059 }
5060 
5061 } // extern "C"
_pi_sampler::context_
pi_context context_
Definition: pi_cuda.hpp:947
PI_COMMAND_TYPE_USER
@ PI_COMMAND_TYPE_USER
Definition: pi.h:373
setErrorMessage
static void setErrorMessage(const char *message, pi_result error_code)
Definition: pi_esimd_emulator.cpp:156
PI_PROFILING_INFO_COMMAND_START
@ PI_PROFILING_INFO_COMMAND_START
Definition: pi.h:516
piEventGetProfilingInfo
pi_result piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1385
hip_piextUSMEnqueueMemset
pi_result hip_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Definition: pi_hip.cpp:4683
PI_DEVICE_INFO_HOST_UNIFIED_MEMORY
@ PI_DEVICE_INFO_HOST_UNIFIED_MEMORY
Definition: pi.h:224
PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT
#define PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT
Extension to denote native support of assert feature by an arbitrary device piDeviceGetInfo call shou...
Definition: pi.h:721
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_esimd_emulator.cpp:1343
PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR
Definition: pi.h:185
hip_piEnqueueMemImageFill
pi_result hip_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
\TODO Not implemented in HIP.
Definition: pi_hip.cpp:4451
PI_DEVICE_INFO_OPENCL_C_VERSION
@ PI_DEVICE_INFO_OPENCL_C_VERSION
Definition: pi.h:242
_pi_mem
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:221
pi_buff_rect_region_struct::depth_scalar
size_t depth_scalar
Definition: pi.h:827
piEventRelease
pi_result piEventRelease(pi_event event)
Definition: pi_esimd_emulator.cpp:1433
PI_MAP_WRITE
constexpr pi_map_flags PI_MAP_WRITE
Definition: pi.h:536
PI_USM_ACCESS
@ PI_USM_ACCESS
Definition: pi.h:1589
PI_DEVICE_INFO_PROFILE
@ PI_DEVICE_INFO_PROFILE
Definition: pi.h:240
piEnqueueKernelLaunch
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1758
piMemGetInfo
pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1073
_pi_mem_type
_pi_mem_type
Definition: pi.h:395
hip_piEnqueueNativeKernel
pi_result hip_piEnqueueNativeKernel(pi_queue queue, void(*user_func)(void *), void *args, size_t cb_args, pi_uint32 num_mem_objects, const pi_mem *mem_list, const void **args_mem_loc, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
\TODO Not implemented
Definition: pi_hip.cpp:2721
PI_KERNEL_INFO_REFERENCE_COUNT
@ PI_KERNEL_INFO_REFERENCE_COUNT
Definition: pi.h:321
PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH
@ PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH
Definition: pi.h:207
PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR
Definition: pi.h:192
_pi_mem::mem_::surface_mem_::get_surface
CUsurfObject get_surface() const noexcept
Definition: pi_cuda.hpp:324
hip_piEnqueueMemBufferMap
pi_result hip_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Implements mapping on the host using a BufferRead operation.
Definition: pi_hip.cpp:4475
imageElementByteSize
static size_t imageElementByteSize(hipArray_Format array_format)
Definition: pi_hip.cpp:4148
ReleaseGuard::ReleaseGuard
ReleaseGuard(T Obj)
Obj can be nullptr.
Definition: pi_hip.cpp:653
PI_DEVICE_INFO_DOUBLE_FP_CONFIG
@ PI_DEVICE_INFO_DOUBLE_FP_CONFIG
Definition: pi.h:183
hip_piProgramCompile
pi_result hip_piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
Creates a new program that is the outcome of the compilation of the headers and the program.
Definition: pi_hip.cpp:3082
_pi_context_info
_pi_context_info
Definition: pi.h:298
pi_buff_rect_region_struct::height_scalar
size_t height_scalar
Definition: pi.h:826
PI_MEM_TYPE_IMAGE1D
@ PI_MEM_TYPE_IMAGE1D
Definition: pi.h:400
_pi_context::kind::primary
@ primary
pi_buff_rect_offset_struct
Definition: pi.h:815
_pi_context::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_cuda.hpp:205
_pi_program::context_
_pi_context * context_
Definition: pi_cuda.hpp:724
piextUSMFree
pi_result piextUSMFree(pi_context context, void *ptr)
Indicates that the allocated USM memory is no longer needed on the runtime side.
Definition: pi_esimd_emulator.cpp:1888
piKernelSetArg
pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Definition: pi_esimd_emulator.cpp:1347
_pi_event::get_context
pi_context get_context() const noexcept
Definition: pi_cuda.hpp:630
hip_piEventSetStatus
pi_result hip_piEventSetStatus(pi_event event, pi_int32 execution_status)
Definition: pi_hip.cpp:3508
PI_KERNEL_INFO_ATTRIBUTES
@ PI_KERNEL_INFO_ATTRIBUTES
Definition: pi.h:324
hip_piextProgramCreateWithNativeHandle
pi_result hip_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_program *program)
Created a PI program object from a HIP program handle.
Definition: pi_hip.cpp:3197
_pi_mem::mem_::buffer_mem_::hostPtr_
void * hostPtr_
Pointer associated with this device on the host.
Definition: pi_cuda.hpp:250
_pi_context::deviceId_
_pi_device * deviceId_
Definition: pi_cuda.hpp:172
PI_DEVICE_INFO_DRIVER_VERSION
@ PI_DEVICE_INFO_DRIVER_VERSION
Definition: pi.h:239
PI_DEVICE_INFO_GPU_EU_COUNT
@ PI_DEVICE_INFO_GPU_EU_COUNT
Definition: pi.h:263
pi_bool
pi_uint32 pi_bool
Definition: pi.h:96
_pi_queue::context_
_pi_context * context_
Definition: pi_cuda.hpp:404
PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT
@ PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT
Definition: pi.h:452
PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC
@ PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC
Definition: pi.h:245
T
pi_hip.hpp
_pi_image_format::image_channel_data_type
pi_image_channel_type image_channel_data_type
Definition: pi.h:893
hip_piQueueCreate
pi_result hip_piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
Creates a pi_queue object on the HIP backend.
Definition: pi_hip.cpp:2190
hip_piextUSMGetMemAllocInfo
pi_result hip_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
Definition: pi_hip.cpp:4828
piPluginGetLastError
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
Definition: pi_esimd_emulator.cpp:164
ErrorMessage
thread_local char ErrorMessage[MaxMessageSize]
Definition: pi_esimd_emulator.cpp:153
PI_QUEUE_INFO_CONTEXT
@ PI_QUEUE_INFO_CONTEXT
Definition: pi.h:310
hip_piEventGetProfilingInfo
pi_result hip_piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Obtain profiling information from PI HIP events Timings from HIP are only elapsed time.
Definition: pi_hip.cpp:3465
ReleaseGuard::ReleaseGuard
ReleaseGuard(ReleaseGuard &&Other) noexcept
Definition: pi_hip.cpp:654
_pi_sampler::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_cuda.hpp:952
_pi_queue::get
native_type get()
Definition: pi_cuda.hpp:456
hip_piContextRelease
pi_result hip_piContextRelease(pi_context ctxt)
Definition: pi_hip.cpp:1825
hip_piEventGetInfo
pi_result hip_piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_hip.cpp:3434
PI_DEVICE_INFO_NAME
@ PI_DEVICE_INFO_NAME
Definition: pi.h:237
hip_piContextRetain
pi_result hip_piContextRetain(pi_context context)
Definition: pi_hip.cpp:875
hip_piextEventGetNativeHandle
pi_result hip_piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native HIP handle of a PI event object.
Definition: pi_hip.cpp:3615
hip_piextUSMEnqueueMemAdvise
pi_result hip_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM: memadvise API to govern behavior of automatic migration mechanisms.
Definition: pi_hip.cpp:4797
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:794
PI_IMAGE_CHANNEL_TYPE_UNORM_INT8
@ PI_IMAGE_CHANNEL_TYPE_UNORM_INT8
Definition: pi.h:441
PI_PROFILING_INFO_COMMAND_SUBMIT
@ PI_PROFILING_INFO_COMMAND_SUBMIT
Definition: pi.h:515
_pi_sampler::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_cuda.hpp:954
PI_PROGRAM_INFO_BINARY_SIZES
@ PI_PROGRAM_INFO_BINARY_SIZES
Definition: pi.h:292
PI_MEM_FLAGS_HOST_PTR_COPY
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY
Definition: pi.h:530
_pi_image_format::image_channel_order
pi_image_channel_order image_channel_order
Definition: pi.h:892
PI_MEM_ALLOC_SIZE
@ PI_MEM_ALLOC_SIZE
Definition: pi.h:1598
_pi_context::get
native_type get() const noexcept
Definition: pi_cuda.hpp:199
PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN
@ PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN
Definition: pi.h:249
piextProgramGetNativeHandle
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
Definition: pi_esimd_emulator.cpp:1334
cl::sycl::detail::pi::assertion
void assertion(bool Condition, const char *Message=nullptr)
Definition: pi.cpp:545
hip_piextKernelSetArgMemObj
pi_result hip_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
Definition: pi_hip.cpp:2540
_pi_image_desc::image_type
pi_mem_type image_type
Definition: pi.h:897
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_esimd_emulator.cpp:1330
_pi_plugin
Definition: pi.h:1776
_pi_program::get_context
pi_context get_context() const
Definition: pi_cuda.hpp:746
PI_PROGRAM_INFO_SOURCE
@ PI_PROGRAM_INFO_SOURCE
Definition: pi.h:291
_pi_program::MAX_LOG_SIZE
constexpr static size_t MAX_LOG_SIZE
Definition: pi_cuda.hpp:730
PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE
@ PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE
Definition: pi.h:215
hip_piEnqueueMemImageCopy
pi_result hip_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:4377
piDevicePartition
pi_result piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
Definition: pi_esimd_emulator.cpp:813
PI_KERNEL_COMPILE_NUM_SUB_GROUPS
@ PI_KERNEL_COMPILE_NUM_SUB_GROUPS
Definition: pi.h:351
hip_piextGetDeviceFunctionPointer
pi_result hip_piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *func_pointer_ret)
Definition: pi_hip.cpp:939
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:405
PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
@ PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
Definition: pi.h:179
piEnqueueMemBufferCopy
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1566
PI_DEVICE_INFO_MAX_COMPUTE_UNITS
@ PI_DEVICE_INFO_MAX_COMPUTE_UNITS
Definition: pi.h:177
ReleaseGuard
RAII object that calls the reference count release function on the held PI object on destruction.
Definition: pi_cuda.cpp:726
PI_EVENT_INFO_CONTEXT
@ PI_EVENT_INFO_CONTEXT
Definition: pi.h:357
_pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr
@ alloc_host_ptr
_pi_plugin::PluginVersion
char PluginVersion[20]
Definition: pi.h:1786
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
@ PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
Definition: pi.h:275
_pi_mem::mem_::buffer_mem_::get_void
void * get_void() const noexcept
Definition: pi_hip.hpp:251
_pi_result
_pi_result
Definition: pi.h:105
hip_piextMemCreateWithNativeHandle
pi_result hip_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *mem)
Created a PI mem object from a HIP mem handle.
Definition: pi_hip.cpp:2170
PI_PROFILING_INFO_COMMAND_QUEUED
@ PI_PROFILING_INFO_COMMAND_QUEUED
Definition: pi.h:514
piextQueueGetNativeHandle
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle)
Gets the native handle of a PI queue object.
Definition: pi_esimd_emulator.cpp:991
piTearDown
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
Definition: pi_esimd_emulator.cpp:1969
hip_piPlatformGetInfo
pi_result hip_piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_hip.cpp:781
ReleaseGuard::~ReleaseGuard
~ReleaseGuard()
Calls the related PI object release function if the object held is not nullptr or if dismiss has not ...
Definition: pi_hip.cpp:662
ErrorMessageCode
thread_local pi_result ErrorMessageCode
Definition: pi_esimd_emulator.cpp:152
pi_context_properties
intptr_t pi_context_properties
Definition: pi.h:485
piEnqueueMemUnmap
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1629
PI_SAMPLER_INFO_FILTER_MODE
@ PI_SAMPLER_INFO_FILTER_MODE
Definition: pi.h:466
hip_piextContextGetNativeHandle
pi_result hip_piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle)
Gets the native HIP handle of a PI context object.
Definition: pi_hip.cpp:1873
PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE
@ PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE
Definition: pi.h:329
PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8
@ PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8
Definition: pi.h:446
_pi_queue::device_
_pi_device * device_
Definition: pi_cuda.hpp:405
piSamplerGetInfo
pi_result piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1467
_pi_event::get_queued_time
pi_uint64 get_queued_time() const
Definition: pi_cuda.cpp:523
piPlatformGetInfo
pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:443
PI_DEVICE_INFO_GPU_SLICES
@ PI_DEVICE_INFO_GPU_SLICES
Definition: pi.h:265
piDeviceRetain
pi_result piDeviceRetain(pi_device device)
Definition: pi_esimd_emulator.cpp:571
hip_piQueueFinish
pi_result hip_piQueueFinish(pi_queue command_queue)
Definition: pi_hip.cpp:2284
PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE
@ PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE
Definition: pi.h:201
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
_pi_device_type
_pi_device_type
Definition: pi.h:152
hip_piKernelGetGroupInfo
pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_hip.cpp:3247
cl::sycl::info::device_type
device_type
Definition: info_desc.hpp:180
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:858
piQueueRelease
pi_result piQueueRelease(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:963
_pi_mem::mem_::buffer_mem_::get_map_offset
size_t get_map_offset(void *) const noexcept
Definition: pi_cuda.hpp:280
_pi_mem::context_
pi_context context_
Definition: pi_cuda.hpp:227
PI_DEVICE_INFO_REFERENCE_COUNT
@ PI_DEVICE_INFO_REFERENCE_COUNT
Definition: pi.h:235
cl::sycl::ext::intel::experimental::esimd::line
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > line(sycl::ext::intel::esimd::simd< T, 4 > src0, sycl::ext::intel::esimd::simd< T, SZ > src1, Sat sat={})
Linear equation.
Definition: math.hpp:900
PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT
@ PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT
Definition: pi.h:257
_pi_mem::is_buffer
bool is_buffer() const noexcept
Definition: pi_cuda.hpp:373
hip_piSamplerCreate
pi_result hip_piSamplerCreate(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
Creates a PI sampler object.
Definition: pi_hip.cpp:3652
hip_piEnqueueKernelLaunch
pi_result hip_piEnqueueKernelLaunch(pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:2592
PI_DEVICE_INFO_PLATFORM
@ PI_DEVICE_INFO_PLATFORM
Definition: pi.h:234
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
Definition: pi.h:267
PI_DEVICE_LOCAL_MEM_TYPE_LOCAL
@ PI_DEVICE_LOCAL_MEM_TYPE_LOCAL
Definition: pi.h:170
PI_PROGRAM_BUILD_INFO_OPTIONS
@ PI_PROGRAM_BUILD_INFO_OPTIONS
Definition: pi.h:130
PI_PROGRAM_BUILD_STATUS_SUCCESS
@ PI_PROGRAM_BUILD_STATUS_SUCCESS
Definition: pi.h:138
commonEnqueueMemBufferCopyRect
static pi_result commonEnqueueMemBufferCopyRect(hipStream_t hip_stream, pi_buff_rect_region region, const void *src_ptr, const hipMemoryType src_type, pi_buff_rect_offset src_offset, size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr, const hipMemoryType dst_type, pi_buff_rect_offset dst_offset, size_t dst_row_pitch, size_t dst_slice_pitch)
General 3D memory copy operation.
Definition: pi_hip.cpp:3786
hip_piEnqueueMemBufferFill
pi_result hip_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:4035
piextDeviceCreateWithNativeHandle
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform platform, pi_device *device)
Creates PI device object from a native handle.
Definition: pi_esimd_emulator.cpp:822
sycl
Definition: invoke_simd.hpp:68
hip_piextProgramGetNativeHandle
pi_result hip_piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native HIP handle of a PI program object.
Definition: pi_hip.cpp:3180
hip_definitions.hpp
piKernelGetSubGroupInfo
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information from the sub-group from a kernel.
Definition: pi_esimd_emulator.cpp:1369
PI_DEVICE_INFO_ADDRESS_BITS
@ PI_DEVICE_INFO_ADDRESS_BITS
Definition: pi.h:200
_pi_mem::mem_::surface_mem_::get_array
CUarray get_array() const noexcept
Definition: pi_cuda.hpp:322
_pi_image_desc::image_height
size_t image_height
Definition: pi.h:899
_pi_platform
A PI platform stores all known PI devices, in the CUDA plugin this is just a vector of available devi...
Definition: pi_cuda.hpp:72
cl::sycl::host_ptr
multi_ptr< ElementType, access::address_space::ext_intel_global_host_space > host_ptr
Definition: pointers.hpp:33
PI_CONTEXT_INFO_NUM_DEVICES
@ PI_CONTEXT_INFO_NUM_DEVICES
Definition: pi.h:301
PI_DEVICE_TYPE_DEFAULT
@ PI_DEVICE_TYPE_DEFAULT
The default device available in the PI plugin.
Definition: pi.h:153
hip_piMemImageGetInfo
pi_result hip_piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
\TODO Not implemented
Definition: pi_hip.cpp:2897
PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
@ PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
Definition: pi.h:332
PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE
Definition: pi.h:190
PI_DEVICE_INFO_MAX_SAMPLERS
@ PI_DEVICE_INFO_MAX_SAMPLERS
Definition: pi.h:212
PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE
@ PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE
Definition: pi.h:331
piEnqueueMemImageFill
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1746
PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
@ PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
Definition: pi.h:251
PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN
@ PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN
Definition: pi.h:214
piSamplerCreate
pi_result piSamplerCreate(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
Definition: pi_esimd_emulator.cpp:1462
hip_piEnqueueMemBufferWrite
pi_result hip_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:2355
hip_piEventSetCallback
pi_result hip_piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, pfn_notify notify, void *user_data)
Definition: pi_hip.cpp:3496
piextEventCreateWithNativeHandle
pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *event)
Creates PI event object from a native handle.
Definition: pi_esimd_emulator.cpp:1458
PI_DEVICE_INFO_BUILT_IN_KERNELS
@ PI_DEVICE_INFO_BUILT_IN_KERNELS
Definition: pi.h:233
_pi_sampler_addressing_mode
_pi_sampler_addressing_mode
Definition: pi.h:472
piKernelRelease
pi_result piKernelRelease(pi_kernel kernel)
Definition: pi_esimd_emulator.cpp:1377
PI_DEVICE_INFO_USM_DEVICE_SUPPORT
@ PI_DEVICE_INFO_USM_DEVICE_SUPPORT
Definition: pi.h:255
hip_piProgramCreateWithBinary
pi_result hip_piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *program)
Loads images from a list of PTX or HIPBIN binaries.
Definition: pi_hip.cpp:2981
PI_KERNEL_INFO_PROGRAM
@ PI_KERNEL_INFO_PROGRAM
Definition: pi.h:323
hip_piEnqueueMemUnmap
pi_result hip_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Implements the unmap from the host, using a BufferWrite operation.
Definition: pi_hip.cpp:4535
hip_piProgramRetain
pi_result hip_piProgramRetain(pi_program program)
Definition: pi_hip.cpp:3135
PI_PLATFORM_INFO_NAME
@ PI_PLATFORM_INFO_NAME
Definition: pi.h:122
PI_FP_ROUND_TO_NEAREST
static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST
Definition: pi.h:625
pi.hpp
piextUSMSharedAlloc
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
Definition: pi_esimd_emulator.cpp:1849
PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE
@ PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE
Definition: pi.h:211
_pi_device_info
_pi_device_info
Definition: pi.h:174
PI_MEM_TYPE_DEVICE
@ PI_MEM_TYPE_DEVICE
Definition: pi.h:1605
PI_USM_ATOMIC_ACCESS
@ PI_USM_ATOMIC_ACCESS
Definition: pi.h:1590
PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE
@ PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE
Definition: pi.h:219
_pi_image_info
_pi_image_info
Definition: pi.h:338
hip_piDevicePartition
pi_result hip_piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
Not applicable to HIP, devices cannot be partitioned.
Definition: pi_hip.cpp:891
getKernelNames
std::string getKernelNames(pi_program program)
Finds kernel names by searching for entry points in the PTX source, as the HIP driver API doesn't exp...
Definition: pi_hip.cpp:608
piEventSetStatus
pi_result piEventSetStatus(pi_event event, pi_int32 execution_status)
Definition: pi_esimd_emulator.cpp:1421
_pi_program::build_program
pi_result build_program(const char *build_options)
Definition: pi_cuda.cpp:675
piextUSMDeviceAlloc
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
Definition: pi_esimd_emulator.cpp:1844
_PI_HIP_PLUGIN_VERSION_STRING
#define _PI_HIP_PLUGIN_VERSION_STRING
Definition: pi_hip.hpp:25
hip_piextDeviceGetNativeHandle
pi_result hip_piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native HIP handle of a PI device object.
Definition: pi_hip.cpp:1696
PI_SAMPLER_PROPERTIES_ADDRESSING_MODE
constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE
Definition: pi.h:496
PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT
Definition: pi.h:187
PI_SAMPLER_FILTER_MODE_NEAREST
@ PI_SAMPLER_FILTER_MODE_NEAREST
Definition: pi.h:481
hip_piextUSMFree
pi_result hip_piextUSMFree(pi_context context, void *ptr)
USM: Frees the given USM pointer associated with the context.
Definition: pi_hip.cpp:4659
hip_piextEventCreateWithNativeHandle
pi_result hip_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *event)
Created a PI event object from a HIP event handle.
Definition: pi_hip.cpp:3629
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:773
PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT
@ PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT
Definition: pi.h:223
_pi_sampler::props_
pi_uint32 props_
Definition: pi_cuda.hpp:946
piextDeviceGetNativeHandle
pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native handle of a PI device object.
Definition: pi_esimd_emulator.cpp:818
PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT
@ PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT
Definition: pi.h:206
cl::sycl::info::event
event
Definition: info_desc.hpp:289
hip_piEnqueueMemBufferWriteRect
pi_result hip_piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:3891
hip_piSamplerGetInfo
pi_result hip_piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Gets information from a PI sampler object.
Definition: pi_hip.cpp:3711
pi_buff_rect_offset_struct::y_scalar
size_t y_scalar
Definition: pi.h:817
PI_EVENT_INFO_COMMAND_TYPE
@ PI_EVENT_INFO_COMMAND_TYPE
Definition: pi.h:358
PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
constexpr pi_queue_properties PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
Definition: pi.h:560
piextContextCreateWithNativeHandle
pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_uint32 numDevices, const pi_device *devices, bool pluginOwnsNativeHandle, pi_context *context)
Creates PI context object from a native handle.
Definition: pi_esimd_emulator.cpp:872
cl::sycl::info::queue
queue
Definition: info_desc.hpp:229
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1302
hip_piQueueRetain
pi_result hip_piQueueRetain(pi_queue command_queue)
Definition: pi_hip.cpp:2252
_pi_mem::mem_::buffer_mem_::alloc_mode
alloc_mode
alloc_mode classic: Just a normal buffer allocated on the device via cuda malloc use_host_ptr: Use an...
Definition: pi_cuda.hpp:267
_pi_plugin::PiVersion
char PiVersion[20]
Definition: pi.h:1784
PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE
@ PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE
Definition: pi.h:217
PI_DEVICE_EXEC_CAPABILITIES_KERNEL
constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL
Definition: pi.h:488
cl::sycl::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
PI_CONTEXT_INFO_DEVICES
@ PI_CONTEXT_INFO_DEVICES
Definition: pi.h:299
_pi_sampler_filter_mode
_pi_sampler_filter_mode
Definition: pi.h:480
_pi_queue_info
_pi_queue_info
Definition: pi.h:309
pi_buff_rect_region_struct::width_bytes
size_t width_bytes
Definition: pi.h:825
hip_piextMemGetNativeHandle
pi_result hip_piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native HIP handle of a PI mem object.
Definition: pi_hip.cpp:2133
_pi_buffer_create_type
_pi_buffer_create_type
Definition: pi.h:456
_pi_queue::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_cuda.hpp:575
hip_piextUSMSharedAlloc
pi_result hip_piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
USM: Implements USM Shared allocations using HIP Managed Memory.
Definition: pi_hip.cpp:4634
_pi_program::buildOptions_
std::string buildOptions_
Definition: pi_cuda.hpp:733
piextUSMEnqueueMemcpy
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_esimd_emulator.cpp:1921
piMemBufferPartition
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_esimd_emulator.cpp:1752
_pi_mem::mem_type_
enum _pi_mem::mem_type mem_type_
hip_piEnqueueMemImageRead
pi_result hip_piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:4242
_pi_queue::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_cuda.hpp:579
PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF
Definition: pi.h:198
piextUSMEnqueueMemAdvise
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_esimd_emulator.cpp:1926
piQueueCreate
pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
Definition: pi_esimd_emulator.cpp:923
hip_piProgramGetInfo
pi_result hip_piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_hip.cpp:3020
_pi_queue::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_cuda.hpp:577
PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE
@ PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE
Definition: pi.h:210
_pi_program::buildStatus_
pi_program_build_status buildStatus_
Definition: pi_cuda.hpp:734
PI_DEVICE_INFO_LOCAL_MEM_SIZE
@ PI_DEVICE_INFO_LOCAL_MEM_SIZE
Definition: pi.h:222
piextProgramCreateWithNativeHandle
pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool pluginOwnsNativeHandle, pi_program *program)
Creates PI program object from a native handle.
Definition: pi_esimd_emulator.cpp:1338
cl::sycl::length
float length(T p) __NOEXC
Definition: builtins.hpp:1032
piEventsWait
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_esimd_emulator.cpp:1398
_pi_program::module_
native_type module_
Definition: pi_cuda.hpp:720
piContextRelease
pi_result piContextRelease(pi_context context)
Definition: pi_esimd_emulator.cpp:888
_pi_queue
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:392
commonEnqueueMemImageNDCopy
static pi_result commonEnqueueMemImageNDCopy(hipStream_t hip_stream, pi_mem_type img_type, const size_t *region, const void *src_ptr, const hipMemoryType src_type, const size_t *src_offset, void *dst_ptr, const hipMemoryType dst_type, const size_t *dst_offset)
General ND memory copy operation for images (where N > 1).
Definition: pi_hip.cpp:4174
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1332
PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16
@ PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16
Definition: pi.h:447
hip_piPlatformsGet
pi_result hip_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
Obtains the HIP platform.
Definition: pi_hip.cpp:700
PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32
@ PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32
Definition: pi.h:448
PI_FALSE
const pi_bool PI_FALSE
Definition: pi.h:459
_pi_event::make_native
static pi_event make_native(pi_command_type type, pi_queue queue, CUstream stream, pi_uint32 stream_token=std::numeric_limits< pi_uint32 >::max())
Definition: pi_cuda.hpp:654
PI_MAP_READ
constexpr pi_map_flags PI_MAP_READ
Definition: pi.h:535
PI_DEVICE_TYPE_GPU
@ PI_DEVICE_TYPE_GPU
A PI device that is a GPU.
Definition: pi.h:157
pi_uint32
uint32_t pi_uint32
Definition: pi.h:94
_pi_image_desc::image_depth
size_t image_depth
Definition: pi.h:900
_pi_mem::mem_::buffer_mem_::ptr_
native_type ptr_
Definition: pi_cuda.hpp:247
_pi_mem::mem_::buffer_mem_::get_map_flags
pi_map_flags get_map_flags() const noexcept
Definition: pi_cuda.hpp:310
PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT
@ PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT
Definition: pi.h:370
piEnqueueMemBufferReadRect
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1544
pi_buff_rect_region_struct
Definition: pi.h:824
piMemRelease
pi_result piMemRelease(pi_mem mem)
Definition: pi_esimd_emulator.cpp:1085
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:758
piEnqueueMemBufferCopyRect
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1572
hip_piextContextCreateWithNativeHandle
pi_result hip_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *context)
Created a PI context object from a HIP context handle.
Definition: pi_hip.cpp:1887
hip_piProgramCreate
pi_result hip_piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
\TODO Not implemented
Definition: pi_hip.cpp:2964
hip_piEnqueueMemBufferRead
pi_result hip_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:2401
_pi_mem::mem_::surface_mem_
struct _pi_mem::mem_::surface_mem_ surface_mem_
hip_piKernelSetArg
pi_result hip_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Definition: pi_hip.cpp:2523
piextContextSetExtendedDeleter
pi_result piextContextSetExtendedDeleter(pi_context context, pi_context_extended_deleter func, void *user_data)
Definition: pi_esimd_emulator.cpp:863
_pi_mem::mem_::buffer_mem_::get_with_offset
native_type get_with_offset(size_t offset) const noexcept
Definition: pi_hip.hpp:246
PI_DEVICE_INFO_LINKER_AVAILABLE
@ PI_DEVICE_INFO_LINKER_AVAILABLE
Definition: pi.h:229
PI_DEVICE_INFO_EXECUTION_CAPABILITIES
@ PI_DEVICE_INFO_EXECUTION_CAPABILITIES
Definition: pi.h:230
PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION
@ PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION
Definition: pi.h:225
_pi_kernel_exec_info
_pi_kernel_exec_info
Definition: pi.h:1282
_pi_queue::properties_
pi_queue_properties properties_
Definition: pi_cuda.hpp:406
piclProgramCreateWithSource
pi_result piclProgramCreateWithSource(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, pi_program *ret_program)
Definition: pi_esimd_emulator.cpp:1297
PI_MEM_TYPE_IMAGE2D
@ PI_MEM_TYPE_IMAGE2D
Definition: pi.h:397
_pi_program::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_cuda.hpp:754
PI_COMMAND_TYPE_IMAGE_COPY
@ PI_COMMAND_TYPE_IMAGE_COPY
Definition: pi.h:377
PI_KERNEL_INFO_CONTEXT
@ PI_KERNEL_INFO_CONTEXT
Definition: pi.h:322
piEnqueueMemBufferWrite
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1552
PI_PROGRAM_BUILD_INFO_STATUS
@ PI_PROGRAM_BUILD_INFO_STATUS
Definition: pi.h:129
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_esimd_emulator.cpp:1284
_pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr
@ use_host_ptr
_pi_mem::mem_::buffer_mem_::get
native_type get() const noexcept
Definition: pi_cuda.hpp:274
cl::sycl::detail::pi::die
void die(const char *Message)
Definition: pi.cpp:540
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_esimd_emulator.cpp:1936
PI_DEVICE_INFO_HALF_FP_CONFIG
@ PI_DEVICE_INFO_HALF_FP_CONFIG
Definition: pi.h:182
PI_FP_DENORM
static constexpr pi_device_fp_config PI_FP_DENORM
Definition: pi.h:623
pi_mem_flags
pi_bitfield pi_mem_flags
Definition: pi.h:524
_pi_event_status
_pi_event_status
Definition: pi.h:113
piKernelGetGroupInfo
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1364
PI_IMAGE_CHANNEL_TYPE_FLOAT
@ PI_IMAGE_CHANNEL_TYPE_FLOAT
Definition: pi.h:453
piEnqueueMemImageRead
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1669
hip_piDeviceGetInfo
pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_hip.cpp:969
hip_piTearDown
pi_result hip_piTearDown(void *PluginParameter)
Definition: pi_hip.cpp:4912
PI_USM_CONCURRENT_ACCESS
@ PI_USM_CONCURRENT_ACCESS
Definition: pi.h:1591
PI_MEM_TYPE_SHARED
@ PI_MEM_TYPE_SHARED
Definition: pi.h:1606
PI_DEVICE_INFO_VENDOR
@ PI_DEVICE_INFO_VENDOR
Definition: pi.h:238
__SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN
#define __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN
Definition: pi.h:716
pi_context_extended_deleter
void(* pi_context_extended_deleter)(void *user_data)
Definition: pi.h:1048
PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES
@ PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES
Definition: pi.h:231
PI_KERNEL_GROUP_INFO_NUM_REGS
@ PI_KERNEL_GROUP_INFO_NUM_REGS
Definition: pi.h:335
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME
#define __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(parameter)
Definition: pi.hpp:96
hip_piextQueueCreateWithNativeHandle
pi_result hip_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, pi_device device, bool ownNativeHandle, pi_queue *queue)
Created a PI queue object from a HIP queue handle.
Definition: pi_hip.cpp:2340
hip_piextDeviceCreateWithNativeHandle
pi_result hip_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform platform, pi_device *device)
Created a PI device object from a HIP device handle.
Definition: pi_hip.cpp:1711
PI_KERNEL_MAX_SUB_GROUP_SIZE
@ PI_KERNEL_MAX_SUB_GROUP_SIZE
Definition: pi.h:349
_pi_context::kind::user_defined
@ user_defined
hip_piSamplerRetain
pi_result hip_piSamplerRetain(pi_sampler sampler)
Retains a PI sampler object, incrementing its reference count.
Definition: pi_hip.cpp:3752
hip_piMemGetInfo
pi_result hip_piMemGetInfo(pi_mem memObj, pi_mem_info queriedInfo, size_t expectedQuerySize, void *queryOutput, size_t *writtenQuerySize)
Definition: pi_hip.cpp:2115
hip_piEnqueueMemBufferCopyRect
pi_result hip_piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:3989
piQueueRetain
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:955
defines.hpp
PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT
@ PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT
Definition: pi.h:258
PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT
Definition: pi.h:186
piEnqueueEventsWaitWithBarrier
pi_result piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1481
piEventCreate
decltype(piEventCreate) piEventCreate
Definition: pi_level_zero.cpp:1865
PI_SAMPLER_INFO_CONTEXT
@ PI_SAMPLER_INFO_CONTEXT
Definition: pi.h:463
piKernelRetain
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_esimd_emulator.cpp:1375
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_esimd_emulator.cpp:591
piEventSetCallback
pi_result piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, void(*pfn_notify)(pi_event event, pi_int32 event_command_status, void *user_data), void *user_data)
PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE
@ PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE
Definition: pi.h:216
PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG
Definition: pi.h:195
piEnqueueMemBufferFill
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1580
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
PI_DEVICE_INFO_USM_HOST_SUPPORT
@ PI_DEVICE_INFO_USM_HOST_SUPPORT
Definition: pi.h:254
piSamplerRetain
pi_result piSamplerRetain(pi_sampler sampler)
Definition: pi_esimd_emulator.cpp:1472
piMemImageGetInfo
pi_result piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1665
hip_piEnqueueMemBufferCopy
pi_result hip_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_hip.cpp:3942
hip_piQueueRelease
pi_result hip_piQueueRelease(pi_queue command_queue)
Definition: pi_hip.cpp:2260
PI_MEM_FLAGS_HOST_PTR_USE
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:529
piextEventGetNativeHandle
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
Definition: pi_esimd_emulator.cpp:1454
PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT
Definition: pi.h:196
PI_PROGRAM_INFO_REFERENCE_COUNT
@ PI_PROGRAM_INFO_REFERENCE_COUNT
Definition: pi.h:287
hip_piEnqueueEventsWaitWithBarrier
pi_result hip_piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Enqueues a wait on the given queue for all specified events.
Definition: pi_hip.cpp:3572
PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16
@ PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16
Definition: pi.h:278
PI_MEM_TYPE_UNKNOWN
@ PI_MEM_TYPE_UNKNOWN
Definition: pi.h:1603
PI_DEVICE_INFO_PARENT_DEVICE
@ PI_DEVICE_INFO_PARENT_DEVICE
Definition: pi.h:246
_pi_mem::mem_::buffer_mem_::get_size
size_t get_size() const noexcept
Definition: pi_cuda.hpp:276
PI_PLATFORM_INFO_VERSION
@ PI_PLATFORM_INFO_VERSION
Definition: pi.h:125
PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16
@ PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16
Definition: pi.h:450
PI_FP_ROUND_TO_INF
static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF
Definition: pi.h:627
pi_uint64
uint64_t pi_uint64
Definition: pi.h:95
hip_piextKernelSetArgSampler
pi_result hip_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
Definition: pi_hip.cpp:2576
PI_COMMAND_TYPE_MEM_BUFFER_WRITE
@ PI_COMMAND_TYPE_MEM_BUFFER_WRITE
Definition: pi.h:366
PI_DEVICE_INFO_PARTITION_TYPE
@ PI_DEVICE_INFO_PARTITION_TYPE
Definition: pi.h:250
PI_COMMAND_TYPE_MEM_BUFFER_COPY
@ PI_COMMAND_TYPE_MEM_BUFFER_COPY
Definition: pi.h:367
_pi_event_info
_pi_event_info
Definition: pi.h:355
piextUSMHostAlloc
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
Definition: pi_esimd_emulator.cpp:1839
_pi_sampler_info
_pi_sampler_info
Definition: pi.h:461
_pi_device_binary_property_struct
Definition: pi.h:661
hip_piKernelGetInfo
pi_result hip_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_hip.cpp:3211
hip_piEventsWait
pi_result hip_piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_hip.cpp:2448
_pi_event::wait
pi_result wait()
Definition: pi_cuda.cpp:580
pi_mem_properties
pi_bitfield pi_mem_properties
Definition: pi.h:540
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:718
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
@ PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
Definition: pi.h:306
PI_QUEUE_INFO_DEVICE
@ PI_QUEUE_INFO_DEVICE
Definition: pi.h:311
PI_PROGRAM_BUILD_STATUS_ERROR
@ PI_PROGRAM_BUILD_STATUS_ERROR
Definition: pi.h:137
_pi_sampler
Implementation of samplers for CUDA.
Definition: pi_cuda.hpp:944
PI_DEVICE_INFO_PRINTF_BUFFER_SIZE
@ PI_DEVICE_INFO_PRINTF_BUFFER_SIZE
Definition: pi.h:244
_pi_sampler::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_cuda.hpp:956
PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE
Definition: pi.h:197
hip_piKernelRelease
pi_result hip_piKernelRelease(pi_kernel kernel)
Definition: pi_hip.cpp:3378
_pi_device::get
native_type get() const noexcept
Definition: pi_cuda.hpp:98
PI_COMMAND_TYPE_IMAGE_READ
@ PI_COMMAND_TYPE_IMAGE_READ
Definition: pi.h:375
piextUSMGetMemAllocInfo
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
Definition: pi_esimd_emulator.cpp:1931
piDeviceRelease
pi_result piDeviceRelease(pi_device device)
Definition: pi_esimd_emulator.cpp:581
hip_piextProgramSetSpecializationConstant
pi_result hip_piextProgramSetSpecializationConstant(pi_program, pi_uint32, size_t, const void *)
Definition: pi_hip.cpp:3409
PI_PLATFORM_INFO_PROFILE
@ PI_PLATFORM_INFO_PROFILE
Definition: pi.h:123
ReleaseGuard::dismiss
void dismiss()
End the guard and do not release the reference count of the held PI object.
Definition: pi_hip.cpp:686
_pi_program::infoLog_
char infoLog_[MAX_LOG_SIZE]
Definition: pi_cuda.hpp:732
PI_KERNEL_MAX_NUM_SUB_GROUPS
@ PI_KERNEL_MAX_NUM_SUB_GROUPS
Definition: pi.h:350
piextUSMEnqueueMemset
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
Definition: pi_esimd_emulator.cpp:1916
PI_DEVICE_INFO_MAX_CONSTANT_ARGS
@ PI_DEVICE_INFO_MAX_CONSTANT_ARGS
Definition: pi.h:220
_pi_event::is_completed
bool is_completed() const noexcept
Definition: pi_cuda.cpp:506
hip_piMemImageCreate
pi_result hip_piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
\TODO Not implemented
Definition: pi_hip.cpp:2743
PI_DEVICE_INFO_PCI_ADDRESS
@ PI_DEVICE_INFO_PCI_ADDRESS
Definition: pi.h:262
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:98
PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT
Definition: pi.h:193
cl::sycl::info::device::driver_version
@ driver_version
PI_DEVICE_INFO_GLOBAL_MEM_SIZE
@ PI_DEVICE_INFO_GLOBAL_MEM_SIZE
Definition: pi.h:218
hip_piDeviceRetain
pi_result hip_piDeviceRetain(pi_device device)
Definition: pi_hip.cpp:849
piQueueFinish
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:977
piContextRetain
pi_result piContextRetain(pi_context context)
Definition: pi_esimd_emulator.cpp:878
pi_sampler_properties
pi_bitfield pi_sampler_properties
Definition: pi.h:493
piextKernelSetArgSampler
pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
Definition: pi_esimd_emulator.cpp:1356
ReleaseGuard::ReleaseGuard
ReleaseGuard()=delete
PI_KERNEL_INFO_FUNCTION_NAME
@ PI_KERNEL_INFO_FUNCTION_NAME
Definition: pi.h:319
PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE
@ PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE
Definition: pi.h:166
PI_MEM_FLAGS_ACCESS_RW
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:526
__SYCL_PI_CONTEXT_PROPERTIES_HIP_PRIMARY
#define __SYCL_PI_CONTEXT_PROPERTIES_HIP_PRIMARY
Definition: hip_definitions.hpp:19
piextKernelSetArgMemObj
pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
Definition: pi_esimd_emulator.cpp:1351
piMemImageCreate
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_esimd_emulator.cpp:1152
_pi_mem::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_cuda.hpp:387
_pi_event::release
pi_result release()
Definition: pi_cuda.cpp:592
_pi_image_format
Definition: pi.h:891
hip_piEventRetain
pi_result hip_piEventRetain(pi_event event)
Definition: pi_hip.cpp:3516
PI_COMMAND_TYPE_NDRANGE_KERNEL
@ PI_COMMAND_TYPE_NDRANGE_KERNEL
Definition: pi.h:364
PI_PLATFORM_INFO_VENDOR
@ PI_PLATFORM_INFO_VENDOR
Definition: pi.h:124
hip_piclProgramCreateWithSource
pi_result hip_piclProgramCreateWithSource(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, pi_program *program)
Not used as HIP backend only creates programs from binary.
Definition: pi_hip.cpp:2920
_pi_mem::mem_type::buffer
@ buffer
cl::sycl::detail::pi::hipPrint
void hipPrint(const char *Message)
Definition: pi_hip.cpp:363
_pi_kernel_info
_pi_kernel_info
Definition: pi.h:318
_pi_mem::mem_::buffer_mem_::native_type
CUdeviceptr native_type
Definition: pi_cuda.hpp:241
PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT
@ PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT
Definition: pi.h:256
piPlatformsGet
pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
Definition: pi_esimd_emulator.cpp:400
hip_piDeviceRelease
pi_result hip_piDeviceRelease(pi_device device)
Definition: pi_hip.cpp:964
hip_piEnqueueEventsWait
pi_result hip_piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Enqueues a wait on the given queue for all events.
Definition: pi_hip.cpp:3559
_pi_program::~_pi_program
~_pi_program()
Definition: pi_cuda.cpp:625
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
@ PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
Definition: pi.h:274
PI_IMAGE_CHANNEL_ORDER_RGBA
@ PI_IMAGE_CHANNEL_ORDER_RGBA
Definition: pi.h:426
cl::sycl::info::context
context
Definition: info_desc.hpp:42
hip_piKernelGetSubGroupInfo
pi_result hip_piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_hip.cpp:3318
PI_MEM_ALLOC_BASE_PTR
@ PI_MEM_ALLOC_BASE_PTR
Definition: pi.h:1597
_pi_mem::mem_::buffer_mem_::alloc_mode::copy_in
@ copy_in
_pi_program::binary_
const char * binary_
Definition: pi_cuda.hpp:721
PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES
@ PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES
Definition: pi.h:248
_pi_mem::mem_::buffer_mem_::get_map_ptr
void * get_map_ptr() const noexcept
Definition: pi_cuda.hpp:278
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(cl::sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
Definition: pi.h:282
PI_PROGRAM_INFO_KERNEL_NAMES
@ PI_PROGRAM_INFO_KERNEL_NAMES
Definition: pi.h:295
hip_piextUSMEnqueueMemcpy
pi_result hip_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Definition: pi_hip.cpp:4718
_pi_event::~_pi_event
~_pi_event()
Definition: pi_cuda.cpp:481
piEnqueueMemBufferMap
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_esimd_emulator.cpp:1586
PI_SAMPLER_ADDRESSING_MODE_NONE
@ PI_SAMPLER_ADDRESSING_MODE_NONE
Definition: pi.h:477
piextMemGetNativeHandle
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_esimd_emulator.cpp:1271
hip_piProgramRelease
pi_result hip_piProgramRelease(pi_program program)
Decreases the reference count of a pi_program object.
Definition: pi_hip.cpp:3145
_pi_image_desc::image_width
size_t image_width
Definition: pi.h:898
PI_DEVICE_INFO_SINGLE_FP_CONFIG
@ PI_DEVICE_INFO_SINGLE_FP_CONFIG
Definition: pi.h:181
PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT
@ PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT
Definition: pi.h:371
_PI_PLUGIN_VERSION_CHECK
#define _PI_PLUGIN_VERSION_CHECK(PI_API_VERSION, PI_PLUGIN_VERSION)
Definition: pi.h:72
piEnqueueMemBufferRead
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1486
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:271
PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
@ PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
Definition: pi.h:178
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1360
PI_DEVICE_INFO_LOCAL_MEM_TYPE
@ PI_DEVICE_INFO_LOCAL_MEM_TYPE
Definition: pi.h:221
PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH
@ PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH
Definition: pi.h:209
_pi_event::_pi_event
_pi_event()
Definition: pi_esimd_emulator.hpp:201
PI_DEVICE_INFO_COMPILER_AVAILABLE
@ PI_DEVICE_INFO_COMPILER_AVAILABLE
Definition: pi.h:228
piEventGetInfo
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1381
piEnqueueEventsWait
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1476
PI_FP_FMA
static constexpr pi_device_fp_config PI_FP_FMA
Definition: pi.h:628
piMemRetain
pi_result piMemRetain(pi_mem mem)
Definition: pi_esimd_emulator.cpp:1077
hip_piEventRelease
pi_result hip_piEventRelease(pi_event event)
Definition: pi_hip.cpp:3527
hip_piextDeviceSelectBinary
pi_result hip_piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary)
Definition: pi_hip.cpp:906
PI_DEVICE_INFO_VERSION
@ PI_DEVICE_INFO_VERSION
Definition: pi.h:241
PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
@ PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
Definition: pi.h:268
PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8
@ PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8
Definition: pi.h:449
piextUSMEnqueuePrefetch
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_esimd_emulator.cpp:1958
PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32
@ PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32
Definition: pi.h:451
pi_queue_properties
pi_bitfield pi_queue_properties
Definition: pi.h:559
PI_DEVICE_INFO