DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_hip.hpp
Go to the documentation of this file.
1 //===-- pi_hip.hpp - HIP Plugin -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 
17 
18 #ifndef PI_HIP_HPP
19 #define PI_HIP_HPP
20 
21 // This version should be incremented for any change made to this file or its
22 // corresponding .cpp file.
23 #define _PI_HIP_PLUGIN_VERSION 1
24 
25 #define _PI_HIP_PLUGIN_VERSION_STRING \
26  _PI_PLUGIN_VERSION_STRING(_PI_HIP_PLUGIN_VERSION)
27 
28 #include "CL/sycl/detail/pi.h"
29 #include <array>
30 #include <atomic>
31 #include <cassert>
32 #include <cstring>
33 #include <functional>
34 #include <hip/hip_runtime.h>
35 #include <limits>
36 #include <mutex>
37 #include <numeric>
38 #include <stdint.h>
39 #include <string>
40 #include <vector>
41 
42 extern "C" {
43 
58 }
59 
65 struct _pi_platform {
66  std::vector<std::unique_ptr<_pi_device>> devices_;
67 };
68 
74 struct _pi_device {
75 private:
76  using native_type = hipDevice_t;
77 
78  native_type cuDevice_;
79  std::atomic_uint32_t refCount_;
80  pi_platform platform_;
81 
82 public:
83  _pi_device(native_type cuDevice, pi_platform platform)
84  : cuDevice_(cuDevice), refCount_{1}, platform_(platform) {}
85 
86  native_type get() const noexcept { return cuDevice_; };
87 
88  pi_uint32 get_reference_count() const noexcept { return refCount_; }
89 
90  pi_platform get_platform() const noexcept { return platform_; };
91 };
92 
131 struct _pi_context {
132 
133  struct deleter_data {
135  void *user_data;
136 
137  void operator()() { function(user_data); }
138  };
139 
140  using native_type = hipCtx_t;
141 
142  enum class kind { primary, user_defined } kind_;
145  std::atomic_uint32_t refCount_;
146 
147  hipEvent_t evBase_; // HIP event used as base counter
148 
149  _pi_context(kind k, hipCtx_t ctxt, _pi_device *devId)
150  : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1},
151  evBase_(nullptr) {
153  };
154 
156 
158  std::lock_guard<std::mutex> guard(mutex_);
159  for (auto &deleter : extended_deleters_) {
160  deleter();
161  }
162  }
163 
165  void *user_data) {
166  std::lock_guard<std::mutex> guard(mutex_);
167  extended_deleters_.emplace_back(deleter_data{function, user_data});
168  }
169 
170  pi_device get_device() const noexcept { return deviceId_; }
171 
172  native_type get() const noexcept { return hipContext_; }
173 
174  bool is_primary() const noexcept { return kind_ == kind::primary; }
175 
177 
179 
180  pi_uint32 get_reference_count() const noexcept { return refCount_; }
181 
182 private:
183  std::mutex mutex_;
184  std::vector<deleter_data> extended_deleters_;
185 };
186 
191 struct _pi_mem {
192 
193  // TODO: Move as much shared data up as possible
195 
196  // Context where the memory object is accessibles
198 
200  std::atomic_uint32_t refCount_;
201  enum class mem_type { buffer, surface } mem_type_;
202 
208  union mem_ {
209  // Handler for plain, pointer-based HIP allocations
210  struct buffer_mem_ {
211  using native_type = hipDeviceptr_t;
212 
213  // If this allocation is a sub-buffer (i.e., a view on an existing
214  // allocation), this is the pointer to the parent handler structure
215  pi_mem parent_;
216  // HIP handler for the pointer
218 
220  void *hostPtr_;
222  size_t size_;
224  size_t mapOffset_;
226  void *mapPtr_;
229 
237  enum class alloc_mode {
238  classic,
239  use_host_ptr,
240  copy_in,
242  } allocMode_;
243 
244  native_type get() const noexcept { return ptr_; }
245 
246  native_type get_with_offset(size_t offset) const noexcept {
247  return reinterpret_cast<native_type>(reinterpret_cast<uint8_t *>(ptr_) +
248  offset);
249  }
250 
251  void *get_void() const noexcept { return reinterpret_cast<void *>(ptr_); }
252 
253  size_t get_size() const noexcept { return size_; }
254 
255  void *get_map_ptr() const noexcept { return mapPtr_; }
256 
257  size_t get_map_offset(void *ptr) const noexcept {
258  (void)ptr;
259  return mapOffset_;
260  }
261 
266  void *map_to_ptr(size_t offset, pi_map_flags flags) noexcept {
267  assert(mapPtr_ == nullptr);
268  mapOffset_ = offset;
269  mapFlags_ = flags;
270  if (hostPtr_) {
271  mapPtr_ = static_cast<char *>(hostPtr_) + offset;
272  } else {
273  // TODO: Allocate only what is needed based on the offset
274  mapPtr_ = static_cast<void *>(malloc(this->get_size()));
275  }
276  return mapPtr_;
277  }
278 
280  void unmap(void *ptr) noexcept {
281  (void)ptr;
282  assert(mapPtr_ != nullptr);
283 
284  if (mapPtr_ != hostPtr_) {
285  free(mapPtr_);
286  }
287  mapPtr_ = nullptr;
288  mapOffset_ = 0;
289  }
290 
291  pi_map_flags get_map_flags() const noexcept {
292  assert(mapPtr_ != nullptr);
293  return mapFlags_;
294  }
295  } buffer_mem_;
296 
297  // Handler data for surface object (i.e. Images)
298  struct surface_mem_ {
299  hipArray *array_;
300  hipSurfaceObject_t surfObj_;
302 
303  hipArray *get_array() const noexcept { return array_; }
304 
305  hipSurfaceObject_t get_surface() const noexcept { return surfObj_; }
306 
307  pi_mem_type get_image_type() const noexcept { return imageType_; }
308  } surface_mem_;
309  } mem_;
310 
313  hipDeviceptr_t ptr, void *host_ptr, size_t size)
315  mem_.buffer_mem_.ptr_ = ptr;
316  mem_.buffer_mem_.parent_ = parent;
318  mem_.buffer_mem_.size_ = size;
320  mem_.buffer_mem_.mapPtr_ = nullptr;
323  if (is_sub_buffer()) {
325  } else {
327  }
328  };
329 
331  _pi_mem(pi_context ctxt, hipArray *array, hipSurfaceObject_t surf,
332  pi_mem_type image_type, void *host_ptr)
334  (void)host_ptr;
335  mem_.surface_mem_.array_ = array;
336  mem_.surface_mem_.imageType_ = image_type;
337  mem_.surface_mem_.surfObj_ = surf;
339  }
340 
342  if (mem_type_ == mem_type::buffer) {
343  if (is_sub_buffer()) {
345  return;
346  }
347  }
349  }
350 
351  // TODO: Move as many shared funcs up as possible
352  bool is_buffer() const noexcept { return mem_type_ == mem_type::buffer; }
353 
354  bool is_sub_buffer() const noexcept {
355  return (is_buffer() && (mem_.buffer_mem_.parent_ != nullptr));
356  }
357 
358  bool is_image() const noexcept { return mem_type_ == mem_type::surface; }
359 
360  pi_context get_context() const noexcept { return context_; }
361 
363 
365 
366  pi_uint32 get_reference_count() const noexcept { return refCount_; }
367 };
368 
371 struct _pi_queue {
372  using native_type = hipStream_t;
373 
378  std::atomic_uint32_t refCount_;
379  std::atomic_uint32_t eventCount_;
380 
382  pi_queue_properties properties)
383  : stream_{stream}, context_{context}, device_{device},
384  properties_{properties}, refCount_{1}, eventCount_{0} {
387  }
388 
392  }
393 
394  native_type get() const noexcept { return stream_; };
395 
396  _pi_context *get_context() const { return context_; };
397 
399 
401 
402  pi_uint32 get_reference_count() const noexcept { return refCount_; }
403 
404  pi_uint32 get_next_event_id() noexcept { return ++eventCount_; }
405 };
406 
407 typedef void (*pfn_notify)(pi_event event, pi_int32 eventCommandStatus,
408  void *userData);
411 struct _pi_event {
412 public:
413  using native_type = hipEvent_t;
414 
415  pi_result record();
416 
417  pi_result wait();
418 
419  pi_result start();
420 
421  native_type get() const noexcept { return evEnd_; };
422 
423  pi_queue get_queue() const noexcept { return queue_; }
424 
425  pi_command_type get_command_type() const noexcept { return commandType_; }
426 
427  pi_uint32 get_reference_count() const noexcept { return refCount_; }
428 
429  bool is_recorded() const noexcept { return isRecorded_; }
430 
431  bool is_started() const noexcept { return isStarted_; }
432 
433  bool is_completed() const noexcept;
434 
435  pi_int32 get_execution_status() const noexcept {
436 
437  if (!is_recorded()) {
438  return PI_EVENT_SUBMITTED;
439  }
440 
441  if (!is_completed()) {
442  return PI_EVENT_RUNNING;
443  }
444  return PI_EVENT_COMPLETE;
445  }
446 
447  pi_context get_context() const noexcept { return context_; };
448 
449  pi_uint32 increment_reference_count() { return ++refCount_; }
450 
451  pi_uint32 decrement_reference_count() { return --refCount_; }
452 
453  pi_uint32 get_event_id() const noexcept { return eventId_; }
454 
455  // Returns the counter time when the associated command(s) were enqueued
456  //
457  pi_uint64 get_queued_time() const;
458 
459  // Returns the counter time when the associated command(s) started execution
460  //
461  pi_uint64 get_start_time() const;
462 
463  // Returns the counter time when the associated command(s) completed
464  //
465  pi_uint64 get_end_time() const;
466 
467  // construct a native HIP. This maps closely to the underlying HIP event.
469  return new _pi_event(type, queue->get_context(), queue);
470  }
471 
472  pi_result release();
473 
474  ~_pi_event();
475 
476 private:
477  // This constructor is private to force programmers to use the make_native /
478  // make_user static members in order to create a pi_event for HIP.
480 
481  pi_command_type commandType_; // The type of command associated with event.
482 
483  std::atomic_uint32_t refCount_; // Event reference count.
484 
485  bool isCompleted_; // Signifies whether the operations have completed
486  //
487 
488  bool isRecorded_; // Signifies wether a native HIP event has been recorded
489  // yet.
490  bool isStarted_; // Signifies wether the operation associated with the
491  // PI event has started or not
492  //
493 
494  pi_uint32 eventId_; // Queue identifier of the event.
495 
496  native_type evEnd_; // HIP event handle. If this _pi_event represents a user
497  // event, this will be nullptr.
498 
499  native_type evStart_; // HIP event handle associated with the start
500 
501  native_type evQueued_; // HIP event handle associated with the time
502  // the command was enqueued
503 
504  pi_queue queue_; // pi_queue associated with the event. If this is a user
505  // event, this will be nullptr.
506 
507  pi_context context_; // pi_context associated with the event. If this is a
508  // native event, this will be the same context associated
509  // with the queue_ member.
510 };
511 
514 struct _pi_program {
515  using native_type = hipModule_t;
517  const char *binary_;
518  size_t binarySizeInBytes_;
519  std::atomic_uint32_t refCount_;
521 
522  constexpr static size_t MAX_LOG_SIZE = 8192u;
523 
525  std::string buildOptions_;
527 
528  _pi_program(pi_context ctxt);
529  ~_pi_program();
530 
531  pi_result set_binary(const char *binary, size_t binarySizeInBytes);
532 
533  pi_result build_program(const char *build_options);
534 
535  pi_context get_context() const { return context_; };
536 
537  native_type get() const noexcept { return module_; };
538 
540 
542 
543  pi_uint32 get_reference_count() const noexcept { return refCount_; }
544 };
545 
562 struct _pi_kernel {
563  using native_type = hipFunction_t;
564 
567  std::string name_;
570  std::atomic_uint32_t refCount_;
571 
579  struct arguments {
580  static constexpr size_t MAX_PARAM_BYTES = 4000u;
581  using args_t = std::array<char, MAX_PARAM_BYTES>;
582  using args_size_t = std::vector<size_t>;
583  using args_index_t = std::vector<void *>;
588 
589  std::uint32_t implicitOffsetArgs_[3] = {0, 0, 0};
590 
592  // Place the implicit offset index at the end of the indicies collection
593  indices_.emplace_back(&implicitOffsetArgs_);
594  }
595 
601  void add_arg(size_t index, size_t size, const void *arg,
602  size_t localSize = 0) {
603  if (index + 2 > indices_.size()) {
604  // Move implicit offset argument index with the end
605  indices_.resize(index + 2, indices_.back());
606  // Ensure enough space for the new argument
607  paramSizes_.resize(index + 1);
608  offsetPerIndex_.resize(index + 1);
609  }
610  paramSizes_[index] = size;
611  // calculate the insertion point on the array
612  size_t insertPos = std::accumulate(std::begin(paramSizes_),
613  std::begin(paramSizes_) + index, 0);
614  // Update the stored value for the argument
615  std::memcpy(&storage_[insertPos], arg, size);
616  indices_[index] = &storage_[insertPos];
617  offsetPerIndex_[index] = localSize;
618  }
619 
620  void add_local_arg(size_t index, size_t size) {
621  size_t localOffset = this->get_local_size();
622 
623  // maximum required alignment is the size of the largest vector type
624  const size_t max_alignment = sizeof(double) * 16;
625 
626  // for arguments smaller than the maximum alignment simply align to the
627  // size of the argument
628  const size_t alignment = std::min(max_alignment, size);
629 
630  // align the argument
631  size_t alignedLocalOffset = localOffset;
632  if (localOffset % alignment != 0) {
633  alignedLocalOffset += alignment - (localOffset % alignment);
634  }
635 
636  add_arg(index, sizeof(size_t), (const void *)&(alignedLocalOffset),
637  size + (alignedLocalOffset - localOffset));
638  }
639 
640  void set_implicit_offset(size_t size, std::uint32_t *implicitOffset) {
641  assert(size == sizeof(std::uint32_t) * 3);
642  std::memcpy(implicitOffsetArgs_, implicitOffset, size);
643  }
644 
646  std::fill(std::begin(offsetPerIndex_), std::end(offsetPerIndex_), 0);
647  }
648 
649  args_index_t get_indices() const noexcept { return indices_; }
650 
652  return std::accumulate(std::begin(offsetPerIndex_),
653  std::end(offsetPerIndex_), 0);
654  }
655  } args_;
656 
657  _pi_kernel(hipFunction_t func, hipFunction_t funcWithOffsetParam,
658  const char *name, pi_program program, pi_context ctxt)
659  : function_{func}, functionWithOffsetParam_{funcWithOffsetParam},
660  name_{name}, context_{ctxt}, program_{program}, refCount_{1} {
663  }
664 
665  _pi_kernel(hipFunction_t func, const char *name, pi_program program,
666  pi_context ctxt)
667  : _pi_kernel{func, nullptr, name, program, ctxt} {}
668 
672  }
673 
674  pi_program get_program() const noexcept { return program_; }
675 
677 
679 
680  pi_uint32 get_reference_count() const noexcept { return refCount_; }
681 
682  native_type get() const noexcept { return function_; };
683 
686  };
687 
688  bool has_with_offset_parameter() const noexcept {
689  return functionWithOffsetParam_ != nullptr;
690  }
691 
692  pi_context get_context() const noexcept { return context_; };
693 
694  const char *get_name() const noexcept { return name_.c_str(); }
695 
700  pi_uint32 get_num_args() const noexcept { return args_.indices_.size() - 1; }
701 
702  void set_kernel_arg(int index, size_t size, const void *arg) {
703  args_.add_arg(index, size, arg);
704  }
705 
706  void set_kernel_local_arg(int index, size_t size) {
707  args_.add_local_arg(index, size);
708  }
709 
710  void set_implicit_offset_arg(size_t size, std::uint32_t *implicitOffset) {
711  args_.set_implicit_offset(size, implicitOffset);
712  }
713 
715  return args_.get_indices();
716  }
717 
718  pi_uint32 get_local_size() const noexcept { return args_.get_local_size(); }
719 
721 };
722 
728 struct _pi_sampler {
729  std::atomic_uint32_t refCount_;
732 
734  : refCount_(1), props_(0), context_(context) {}
735 
737 
739 
740  pi_uint32 get_reference_count() const noexcept { return refCount_; }
741 };
742 
743 // -------------------------------------------------------------
744 // Helper types and functions
745 //
746 
747 #endif // PI_HIP_HPP
_pi_sampler::context_
pi_context context_
Definition: pi_cuda.hpp:947
_pi_kernel::has_with_offset_parameter
bool has_with_offset_parameter() const noexcept
Definition: pi_hip.hpp:688
_pi_kernel::arguments::MAX_PARAM_BYTES
static constexpr size_t MAX_PARAM_BYTES
Definition: pi_cuda.hpp:794
_pi_mem::_pi_mem
_pi_mem(pi_context ctxt, pi_mem parent, mem_::buffer_mem_::alloc_mode mode, hipDeviceptr_t ptr, void *host_ptr, size_t size)
Constructs the PI MEM handler for a non-typed allocation ("buffer")
Definition: pi_hip.hpp:312
_pi_kernel::native_type
CUfunction native_type
Definition: pi_cuda.hpp:774
_pi_event::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:427
_pi_mem::mem_
A PI Memory object represents either plain memory allocations ("Buffers" in OpenCL) or typed allocati...
Definition: pi_cuda.hpp:238
_pi_mem
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:221
PI_MAP_WRITE
constexpr pi_map_flags PI_MAP_WRITE
Definition: pi.h:536
_pi_mem_type
_pi_mem_type
Definition: pi.h:395
_pi_context::hipContext_
native_type hipContext_
Definition: pi_hip.hpp:143
_pi_kernel::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_hip.hpp:678
_pi_device::_pi_device
_pi_device(native_type cuDevice, pi_platform platform)
Definition: pi_hip.hpp:83
_pi_kernel::arguments::get_local_size
pi_uint32 get_local_size() const
Definition: pi_cuda.hpp:865
_pi_context::kind::primary
@ primary
pi.h
_pi_context::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_hip.hpp:178
_pi_sampler::refCount_
std::atomic_uint32_t refCount_
Definition: pi_cuda.hpp:945
_pi_program::context_
_pi_context * context_
Definition: pi_cuda.hpp:724
_pi_event::get_context
pi_context get_context() const noexcept
Definition: pi_hip.hpp:447
_pi_mem::mem_::buffer_mem_::hostPtr_
void * hostPtr_
Pointer associated with this device on the host.
Definition: pi_cuda.hpp:250
_pi_context::deviceId_
_pi_device * deviceId_
Definition: pi_cuda.hpp:172
_pi_kernel::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:680
_pi_queue::context_
_pi_context * context_
Definition: pi_cuda.hpp:404
_pi_mem::_pi_mem
_pi_mem(pi_context ctxt, hipArray *array, hipSurfaceObject_t surf, pi_mem_type image_type, void *host_ptr)
Constructs the PI allocation for an Image object.
Definition: pi_hip.hpp:331
_pi_mem::is_image
bool is_image() const noexcept
Definition: pi_hip.hpp:358
_pi_kernel::arguments::args_index_t
std::vector< void * > args_index_t
Definition: pi_cuda.hpp:797
_pi_event::is_started
bool is_started() const noexcept
Definition: pi_hip.hpp:431
_pi_sampler::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_hip.hpp:736
hip_piContextRelease
pi_result hip_piContextRelease(pi_context ctxt)
Definition: pi_hip.cpp:1825
hip_piContextRetain
pi_result hip_piContextRetain(pi_context context)
Definition: pi_hip.cpp:875
_pi_device::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:88
_pi_kernel::_pi_kernel
_pi_kernel(hipFunction_t func, const char *name, pi_program program, pi_context ctxt)
Definition: pi_hip.hpp:665
_pi_context::deleter_data
Definition: pi_cuda.hpp:161
_pi_sampler::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_hip.hpp:738
_pi_kernel::arguments::implicitOffsetArgs_
std::uint32_t implicitOffsetArgs_[3]
Definition: pi_cuda.hpp:803
_pi_kernel::arguments::add_local_arg
void add_local_arg(size_t index, size_t size)
Definition: pi_hip.hpp:620
_pi_kernel::name_
std::string name_
Definition: pi_cuda.hpp:778
_pi_queue::native_type
CUstream native_type
Definition: pi_cuda.hpp:393
_pi_context::get
native_type get() const noexcept
Definition: pi_hip.hpp:172
_pi_mem::mem_::buffer_mem_::size_
size_t size_
Size of the allocation in bytes.
Definition: pi_cuda.hpp:252
_pi_program::get_context
pi_context get_context() const
Definition: pi_hip.hpp:535
_pi_program::MAX_LOG_SIZE
constexpr static size_t MAX_LOG_SIZE
Definition: pi_cuda.hpp:730
_pi_sampler::_pi_sampler
_pi_sampler(pi_context context)
Definition: pi_hip.hpp:733
_pi_device::get_platform
pi_platform get_platform() const noexcept
Definition: pi_hip.hpp:90
_pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr
@ alloc_host_ptr
_pi_mem::mem_::buffer_mem_::get_void
void * get_void() const noexcept
Definition: pi_hip.hpp:251
_pi_result
_pi_result
Definition: pi.h:105
_pi_kernel::~_pi_kernel
~_pi_kernel()
Definition: pi_hip.hpp:669
_pi_queue::device_
_pi_device * device_
Definition: pi_cuda.hpp:405
_pi_context::native_type
CUcontext native_type
Definition: pi_cuda.hpp:168
_pi_event::get_queued_time
pi_uint64 get_queued_time() const
Definition: pi_cuda.cpp:523
_pi_mem::context_
pi_context context_
Definition: pi_cuda.hpp:227
_pi_mem::is_buffer
bool is_buffer() const noexcept
Definition: pi_hip.hpp:352
_pi_mem::mem_::surface_mem_::array_
hipArray * array_
Definition: pi_hip.hpp:299
_pi_program::refCount_
std::atomic_uint32_t refCount_
Definition: pi_cuda.hpp:723
PI_EVENT_RUNNING
@ PI_EVENT_RUNNING
Definition: pi.h:115
_pi_platform
A PI platform stores all known PI devices, in the CUDA plugin this is just a vector of available devi...
Definition: pi_cuda.hpp:72
cl::sycl::host_ptr
multi_ptr< ElementType, access::address_space::ext_intel_global_host_space > host_ptr
Definition: pointers.hpp:33
_pi_kernel::arguments::paramSizes_
args_size_t paramSizes_
Definition: pi_cuda.hpp:799
_pi_context::kind
kind
Definition: pi_cuda.hpp:170
_pi_context::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:180
cl::sycl::malloc
void * malloc(size_t size, const device &dev, const context &ctxt, usm::alloc kind, const detail::code_location CodeLoc=detail::code_location::current())
Definition: usm_impl.cpp:410
hip_piProgramRetain
pi_result hip_piProgramRetain(pi_program program)
Definition: pi_hip.cpp:3135
_pi_mem::mem_::buffer_mem_::mapOffset_
size_t mapOffset_
Offset of the active mapped region.
Definition: pi_cuda.hpp:254
_pi_platform::devices_
std::vector< std::unique_ptr< _pi_device > > devices_
Definition: pi_cuda.hpp:74
_pi_program::build_program
pi_result build_program(const char *build_options)
Definition: pi_cuda.cpp:675
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:773
_pi_sampler::props_
pi_uint32 props_
Definition: pi_cuda.hpp:946
cl::sycl::info::event
event
Definition: info_desc.hpp:289
_pi_queue::eventCount_
std::atomic_uint32_t eventCount_
Definition: pi_cuda.hpp:408
cl::sycl::info::queue
queue
Definition: info_desc.hpp:229
hip_piQueueRetain
pi_result hip_piQueueRetain(pi_queue command_queue)
Definition: pi_hip.cpp:2252
_pi_mem::mem_::buffer_mem_::alloc_mode
alloc_mode
alloc_mode classic: Just a normal buffer allocated on the device via cuda malloc use_host_ptr: Use an...
Definition: pi_cuda.hpp:267
_pi_context::set_extended_deleter
void set_extended_deleter(pi_context_extended_deleter function, void *user_data)
Definition: pi_hip.hpp:164
_pi_mem::mem_::surface_mem_::get_surface
hipSurfaceObject_t get_surface() const noexcept
Definition: pi_hip.hpp:305
cl::sycl::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
_pi_mem::~_pi_mem
~_pi_mem()
Definition: pi_hip.hpp:341
_pi_queue::get
native_type get() const noexcept
Definition: pi_hip.hpp:394
_pi_queue::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_hip.hpp:398
_pi_kernel::arguments::get_indices
args_index_t get_indices() const noexcept
Definition: pi_hip.hpp:649
_pi_program::buildOptions_
std::string buildOptions_
Definition: pi_cuda.hpp:733
_pi_context::_pi_context
_pi_context(kind k, hipCtx_t ctxt, _pi_device *devId)
Definition: pi_hip.hpp:149
_pi_mem::mem_type_
enum _pi_mem::mem_type mem_type_
_pi_queue::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:402
_pi_queue::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_hip.hpp:400
_pi_kernel::arguments::args_t
std::array< char, MAX_PARAM_BYTES > args_t
Definition: pi_cuda.hpp:795
_pi_program::buildStatus_
pi_program_build_status buildStatus_
Definition: pi_cuda.hpp:734
_pi_kernel::functionWithOffsetParam_
native_type functionWithOffsetParam_
Definition: pi_cuda.hpp:777
_pi_program::module_
native_type module_
Definition: pi_cuda.hpp:720
_pi_queue
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:392
_pi_kernel::_pi_kernel
_pi_kernel(hipFunction_t func, hipFunction_t funcWithOffsetParam, const char *name, pi_program program, pi_context ctxt)
Definition: pi_hip.hpp:657
_pi_context::~_pi_context
~_pi_context()
Definition: pi_hip.hpp:155
_pi_kernel::get_num_args
pi_uint32 get_num_args() const noexcept
Returns the number of arguments, excluding the implicit global offset.
Definition: pi_hip.hpp:700
_pi_queue::get_next_event_id
pi_uint32 get_next_event_id() noexcept
Definition: pi_hip.hpp:404
pi_uint32
uint32_t pi_uint32
Definition: pi.h:94
_pi_mem::mem_::buffer_mem_::ptr_
native_type ptr_
Definition: pi_cuda.hpp:247
_pi_mem::mem_::buffer_mem_::get_map_flags
pi_map_flags get_map_flags() const noexcept
Definition: pi_hip.hpp:291
_pi_kernel::arguments::offsetPerIndex_
args_size_t offsetPerIndex_
Definition: pi_cuda.hpp:801
_pi_mem::mem_::surface_mem_
struct _pi_mem::mem_::surface_mem_ surface_mem_
_pi_mem::mem_::buffer_mem_::get_with_offset
native_type get_with_offset(size_t offset) const noexcept
Definition: pi_hip.hpp:246
pfn_notify
void(* pfn_notify)(pi_event event, pi_int32 eventCommandStatus, void *userData)
Definition: pi_hip.hpp:407
_pi_event::decrement_reference_count
pi_uint32 decrement_reference_count()
Definition: pi_hip.hpp:451
_pi_kernel::function_
native_type function_
Definition: pi_cuda.hpp:776
_pi_queue::properties_
pi_queue_properties properties_
Definition: pi_cuda.hpp:406
_pi_program::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:543
_pi_kernel::arguments::storage_
args_t storage_
Definition: pi_cuda.hpp:798
_pi_mem::mem_::surface_mem_::surfObj_
hipSurfaceObject_t surfObj_
Definition: pi_hip.hpp:300
_pi_kernel::arguments::add_arg
void add_arg(size_t index, size_t size, const void *arg, size_t localSize=0)
Adds an argument to the kernel.
Definition: pi_hip.hpp:601
_pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr
@ use_host_ptr
_pi_mem::mem_::buffer_mem_::get
native_type get() const noexcept
Definition: pi_hip.hpp:244
_pi_kernel::context_
pi_context context_
Definition: pi_cuda.hpp:779
pi_context_extended_deleter
void(* pi_context_extended_deleter)(void *user_data)
Definition: pi.h:1048
_pi_mem::mem_::surface_mem_::surfObj_
CUsurfObject surfObj_
Definition: pi_cuda.hpp:319
_pi_event::get_execution_status
pi_int32 get_execution_status() const noexcept
Definition: pi_cuda.hpp:618
_pi_context::deleter_data::user_data
void * user_data
Definition: pi_cuda.hpp:163
_pi_context::kind::user_defined
@ user_defined
_pi_kernel::get_program
pi_program get_program() const noexcept
Definition: pi_hip.hpp:674
_pi_mem::mem_type
mem_type
Definition: pi_cuda.hpp:231
_pi_event::get_event_id
pi_uint32 get_event_id() const noexcept
Definition: pi_hip.hpp:453
_pi_queue::~_pi_queue
~_pi_queue()
Definition: pi_hip.hpp:389
hip_piQueueRelease
pi_result hip_piQueueRelease(pi_queue command_queue)
Definition: pi_hip.cpp:2260
_pi_event::get_command_type
pi_command_type get_command_type() const noexcept
Definition: pi_hip.hpp:425
_pi_event::native_type
CUevent native_type
Definition: pi_cuda.hpp:592
_pi_mem::mem_::buffer_mem_::get_size
size_t get_size() const noexcept
Definition: pi_hip.hpp:253
_pi_kernel::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_hip.hpp:676
_pi_mem::mem_::buffer_mem_::mapPtr_
void * mapPtr_
Pointer to the active mapped region, if any.
Definition: pi_cuda.hpp:256
pi_uint64
uint64_t pi_uint64
Definition: pi.h:95
_pi_mem::mem_::buffer_mem_::parent_
pi_mem parent_
Definition: pi_cuda.hpp:245
cl::sycl::info::platform::name
@ name
_pi_event::wait
pi_result wait()
Definition: pi_cuda.cpp:580
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:718
_pi_sampler
Implementation of samplers for CUDA.
Definition: pi_cuda.hpp:944
_pi_sampler::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:740
hip_piKernelRelease
pi_result hip_piKernelRelease(pi_kernel kernel)
Definition: pi_hip.cpp:3378
_pi_device::get
native_type get() const noexcept
Definition: pi_hip.hpp:86
_pi_kernel::get_context
pi_context get_context() const noexcept
Definition: pi_hip.hpp:692
_pi_queue::refCount_
std::atomic_uint32_t refCount_
Definition: pi_cuda.hpp:407
_pi_program::infoLog_
char infoLog_[MAX_LOG_SIZE]
Definition: pi_cuda.hpp:732
_pi_event::is_completed
bool is_completed() const noexcept
Definition: pi_cuda.cpp:506
hip_piDeviceRetain
pi_result hip_piDeviceRetain(pi_device device)
Definition: pi_hip.cpp:849
_pi_mem::get_reference_count
pi_uint32 get_reference_count() const noexcept
Definition: pi_hip.hpp:366
_pi_event::release
pi_result release()
Definition: pi_cuda.cpp:592
PI_EVENT_COMPLETE
@ PI_EVENT_COMPLETE
Definition: pi.h:114
_pi_mem::mem_type::buffer
@ buffer
_pi_kernel::get_local_size
pi_uint32 get_local_size() const noexcept
Definition: pi_hip.hpp:718
_pi_mem::mem_::buffer_mem_::native_type
CUdeviceptr native_type
Definition: pi_cuda.hpp:241
hip_piDeviceRelease
pi_result hip_piDeviceRelease(pi_device device)
Definition: pi_hip.cpp:964
_pi_context::evBase_
hipEvent_t evBase_
Definition: pi_hip.hpp:147
_pi_program::~_pi_program
~_pi_program()
Definition: pi_cuda.cpp:625
_pi_event::get
native_type get() const noexcept
Definition: pi_hip.hpp:421
cl::sycl::info::context
context
Definition: info_desc.hpp:42
_pi_kernel::arguments::set_implicit_offset
void set_implicit_offset(size_t size, std::uint32_t *implicitOffset)
Definition: pi_hip.hpp:640
_pi_mem::mem_::buffer_mem_::alloc_mode::copy_in
@ copy_in
_pi_program::binary_
const char * binary_
Definition: pi_cuda.hpp:721
_pi_mem::mem_::buffer_mem_::get_map_ptr
void * get_map_ptr() const noexcept
Definition: pi_hip.hpp:255
_pi_kernel::get_with_offset_parameter
native_type get_with_offset_parameter() const noexcept
Definition: pi_hip.hpp:684
_pi_context::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_hip.hpp:176
_pi_event::~_pi_event
~_pi_event()
Definition: pi_cuda.cpp:481
_pi_event::increment_reference_count
pi_uint32 increment_reference_count()
Definition: pi_hip.hpp:449
_pi_context::deleter_data::operator()
void operator()()
Definition: pi_hip.hpp:137
hip_piProgramRelease
pi_result hip_piProgramRelease(pi_program program)
Decreases the reference count of a pi_program object.
Definition: pi_hip.cpp:3145
_pi_kernel::arguments::clear_local_size
void clear_local_size()
Definition: pi_hip.hpp:645
_pi_mem::mem_::surface_mem_::imageType_
pi_mem_type imageType_
Definition: pi_cuda.hpp:320
cl::sycl::detail::usm::free
void free(void *Ptr, const context &Ctxt, const code_location &CL)
Definition: usm_impl.cpp:220
_pi_event::_pi_event
_pi_event()
Definition: pi_esimd_emulator.hpp:201
pi_queue_properties
pi_bitfield pi_queue_properties
Definition: pi.h:559
_pi_queue::_pi_queue
_pi_queue(hipStream_t stream, _pi_context *context, _pi_device *device, pi_queue_properties properties)
Definition: pi_hip.hpp:381
_pi_kernel::program_
pi_program program_
Definition: pi_cuda.hpp:780
_pi_program::_pi_program
_pi_program()
Definition: pi_esimd_emulator.hpp:210
_pi_kernel::arguments
Structure that holds the arguments to the kernel.
Definition: pi_cuda.hpp:793
_pi_mem::mem_type::surface
@ surface
hip_piMemRetain
pi_result hip_piMemRetain(pi_mem mem)
Definition: pi_hip.cpp:2910
_pi_program::native_type
CUmodule native_type
Definition: pi_cuda.hpp:719
PI_PROGRAM_BUILD_STATUS_NONE
@ PI_PROGRAM_BUILD_STATUS_NONE
Definition: pi.h:136
_pi_event::get_start_time
pi_uint64 get_start_time() const
Definition: pi_cuda.cpp:532
_pi_event
PI Event mapping to CUevent.
Definition: pi_cuda.hpp:590
_pi_program::errorLog_
char errorLog_[MAX_LOG_SIZE]
Definition: pi_cuda.hpp:732
_pi_context::refCount_
std::atomic_uint32_t refCount_
Definition: pi_cuda.hpp:173
_pi_mem::mem_::buffer_mem_
struct _pi_mem::mem_::buffer_mem_ buffer_mem_
_pi_queue::stream_
native_type stream_
Definition: pi_hip.hpp:374
_pi_mem::mem_::buffer_mem_::alloc_mode::classic
@ classic
_pi_mem::refCount_
std::atomic_uint32_t refCount_
Reference counting of the handler.
Definition: pi_cuda.hpp:230
_pi_mem::mem_
union _pi_mem::mem_ mem_
_pi_kernel::set_implicit_offset_arg
void set_implicit_offset_arg(size_t size, std::uint32_t *implicitOffset)
Definition: pi_hip.hpp:710
_pi_mem::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_hip.hpp:362
_pi_kernel::get_name
const char * get_name() const noexcept
Definition: pi_hip.hpp:694
_pi_event::is_recorded
bool is_recorded() const noexcept
Definition: pi_hip.hpp:429
_pi_kernel::arguments::arguments
arguments()
Definition: pi_hip.hpp:591
_pi_program::set_binary
pi_result set_binary(const char *binary, size_t binarySizeInBytes)
Definition: pi_cuda.cpp:667
_pi_kernel::get
native_type get() const noexcept
Definition: pi_hip.hpp:682
_pi_kernel::get_arg_indices
arguments::args_index_t get_arg_indices() const
Definition: pi_hip.hpp:714
_pi_program_build_status
_pi_program_build_status
Definition: pi.h:135
_pi_kernel::refCount_
std::atomic_uint32_t refCount_
Definition: pi_cuda.hpp:781
cl::sycl::info::platform
platform
Definition: info_desc.hpp:31
hip_piMemRelease
pi_result hip_piMemRelease(pi_mem memObj)
Decreases the reference count of the Mem object.
Definition: pi_hip.cpp:1984
_pi_kernel::arguments::indices_
args_index_t indices_
Definition: pi_cuda.hpp:800
_pi_mem::mem_::buffer_mem_::unmap
void unmap(void *ptr) noexcept
Detach the allocation from the host memory.
Definition: pi_hip.hpp:280
_pi_kernel::args_
struct _pi_kernel::arguments args_
cl::sycl::access::mode
mode
Definition: access.hpp:28
_pi_kernel::set_kernel_local_arg
void set_kernel_local_arg(int index, size_t size)
Definition: pi_hip.hpp:706
_pi_event::start
pi_result start()
Definition: pi_cuda.cpp:488
cl::sycl::info::device
device
Definition: info_desc.hpp:53
_pi_mem::mem_::buffer_mem_::allocMode_
enum _pi_mem::mem_::buffer_mem_::alloc_mode allocMode_
_pi_mem::get_context
pi_context get_context() const noexcept
Definition: pi_hip.hpp:360
_pi_program::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_hip.hpp:541
_pi_mem::is_sub_buffer
bool is_sub_buffer() const noexcept
Definition: pi_cuda.hpp:375
_pi_queue::get_context
_pi_context * get_context() const
Definition: pi_hip.hpp:396
_pi_context::is_primary
bool is_primary() const noexcept
Definition: pi_hip.hpp:174
_pi_mem::mem_::buffer_mem_::mapFlags_
pi_map_flags mapFlags_
Original flags for the mapped region.
Definition: pi_cuda.hpp:258
_pi_event::get_end_time
pi_uint64 get_end_time() const
Definition: pi_cuda.cpp:541
_pi_program::increment_reference_count
pi_uint32 increment_reference_count() noexcept
Definition: pi_hip.hpp:539
_pi_command_type
_pi_command_type
Definition: pi.h:363
_pi_kernel::set_kernel_arg
void set_kernel_arg(int index, size_t size, const void *arg)
Definition: pi_hip.hpp:702
_pi_kernel::arguments::args_size_t
std::vector< size_t > args_size_t
Definition: pi_cuda.hpp:796
pi_map_flags
pi_bitfield pi_map_flags
Definition: pi.h:534
_pi_program::get
native_type get() const noexcept
Definition: pi_hip.hpp:537
_pi_mem::mem_::surface_mem_::array_
CUarray array_
Definition: pi_cuda.hpp:318
_pi_mem::decrement_reference_count
pi_uint32 decrement_reference_count() noexcept
Definition: pi_hip.hpp:364
hip_piKernelRetain
pi_result hip_piKernelRetain(pi_kernel kernel)
Definition: pi_hip.cpp:3370
_pi_event::make_native
static pi_event make_native(pi_command_type type, pi_queue queue)
Definition: pi_hip.hpp:468
_pi_context::kind_
enum _pi_context::kind kind_
_pi_mem::mem_::surface_mem_::get_image_type
pi_mem_type get_image_type() const noexcept
Definition: pi_hip.hpp:307
_pi_event::get_queue
pi_queue get_queue() const noexcept
Definition: pi_hip.hpp:423
_pi_mem::mem_::buffer_mem_::get_map_offset
size_t get_map_offset(void *ptr) const noexcept
Definition: pi_hip.hpp:257
_pi_mem::mem_::surface_mem_::get_array
hipArray * get_array() const noexcept
Definition: pi_hip.hpp:303
PI_EVENT_SUBMITTED
@ PI_EVENT_SUBMITTED
Definition: pi.h:116
_pi_mem::mem_::buffer_mem_::map_to_ptr
void * map_to_ptr(size_t offset, pi_map_flags flags) noexcept
Returns a pointer to data visible on the host that contains the data on the device associated with th...
Definition: pi_hip.hpp:266
pi_int32
int32_t pi_int32
Definition: pi.h:93
_pi_context
PI context mapping to a CUDA context object.
Definition: pi_cuda.hpp:159
_pi_program::binarySizeInBytes_
size_t binarySizeInBytes_
Definition: pi_cuda.hpp:722
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:82
_pi_event::record
pi_result record()
Definition: pi_cuda.cpp:550
_pi_context::get_device
pi_device get_device() const noexcept
Definition: pi_hip.hpp:170
_pi_context::invoke_extended_deleters
void invoke_extended_deleters()
Definition: pi_hip.hpp:157
_pi_kernel::arguments::get_indices
const args_index_t & get_indices() const noexcept
Definition: pi_cuda.hpp:863
_pi_kernel::clear_local_size
void clear_local_size()
Definition: pi_hip.hpp:720