Implementation of a PI Kernel for CUDA. More...
#include <cuda/pi_cuda.hpp>
Classes | |
struct | ArgumentInfo |
struct | arguments |
Structure that holds the arguments to the kernel. More... | |
struct | Hash |
Public Types | |
using | native_type = CUfunction |
using | native_type = hipFunction_t |
Public Member Functions | |
_pi_kernel (CUfunction func, CUfunction funcWithOffsetParam, const char *name, pi_program program, pi_context ctxt) | |
~_pi_kernel () | |
pi_program | get_program () const noexcept |
pi_uint32 | increment_reference_count () noexcept |
pi_uint32 | decrement_reference_count () noexcept |
pi_uint32 | get_reference_count () const noexcept |
native_type | get () const noexcept |
native_type | get_with_offset_parameter () const noexcept |
bool | has_with_offset_parameter () const noexcept |
pi_context | get_context () const noexcept |
const char * | get_name () const noexcept |
pi_uint32 | get_num_args () const noexcept |
Returns the number of arguments, excluding the implicit global offset. More... | |
void | set_kernel_arg (int index, size_t size, const void *arg) |
void | set_kernel_local_arg (int index, size_t size) |
void | set_implicit_offset_arg (size_t size, std::uint32_t *implicitOffset) |
const arguments::args_index_t & | get_arg_indices () const |
pi_uint32 | get_local_size () const noexcept |
void | clear_local_size () |
_pi_kernel () | |
_pi_kernel (hipFunction_t func, hipFunction_t funcWithOffsetParam, const char *name, pi_program program, pi_context ctxt) | |
_pi_kernel (hipFunction_t func, const char *name, pi_program program, pi_context ctxt) | |
~_pi_kernel () | |
pi_program | get_program () const noexcept |
pi_uint32 | increment_reference_count () noexcept |
pi_uint32 | decrement_reference_count () noexcept |
pi_uint32 | get_reference_count () const noexcept |
native_type | get () const noexcept |
native_type | get_with_offset_parameter () const noexcept |
bool | has_with_offset_parameter () const noexcept |
pi_context | get_context () const noexcept |
const char * | get_name () const noexcept |
pi_uint32 | get_num_args () const noexcept |
Returns the number of arguments, excluding the implicit global offset. More... | |
void | set_kernel_arg (int index, size_t size, const void *arg) |
void | set_kernel_local_arg (int index, size_t size) |
void | set_implicit_offset_arg (size_t size, std::uint32_t *implicitOffset) |
arguments::args_index_t | get_arg_indices () const |
pi_uint32 | get_local_size () const noexcept |
void | clear_local_size () |
_pi_kernel (ze_kernel_handle_t Kernel, bool OwnZeKernel, pi_program Program) | |
pi_result | initialize () |
bool | hasIndirectAccess () |
Public Attributes | |
native_type | function_ |
native_type | functionWithOffsetParam_ |
std::string | name_ |
pi_context | context_ |
pi_program | program_ |
std::atomic_uint32_t | refCount_ |
size_t | reqdThreadsPerBlock_ [REQD_THREADS_PER_BLOCK_DIMENSIONS] |
struct _pi_kernel::arguments | args_ |
ze_kernel_handle_t | ZeKernel |
bool | OwnZeKernel |
pi_program | Program |
std::unordered_set< std::pair< void *const, MemAllocRecord > *, Hash > | MemAllocs |
std::atomic< pi_uint32 > | SubmissionsCount |
std::vector< ArgumentInfo > | PendingArguments |
ZeCache< ZeStruct< ze_kernel_properties_t > > | ZeKernelProperties |
ZeCache< std::string > | ZeKernelName |
Static Public Attributes | |
static constexpr pi_uint32 | REQD_THREADS_PER_BLOCK_DIMENSIONS = 3u |
Implementation of a PI Kernel for CUDA.
Implementation of a PI Kernel for HIP.
PI Kernels are used to set kernel arguments, creating a state on the Kernel object for a given invocation. This is not the case of CUFunction objects, which are simply passed together with the arguments on the invocation. The PI Kernel implementation for CUDA stores the list of arguments, argument sizes and offsets to emulate the interface of PI Kernel, saving the arguments for the later dispatch. Note that in PI API, the Local memory is specified as a size per individual argument, but in CUDA only the total usage of shared memory is required since it is not passed as a parameter. A compiler pass converts the PI API local memory model into the CUDA shared model. This object simply calculates the total of shared memory, and the initial offsets of each parameter.
PI Kernels are used to set kernel arguments, creating a state on the Kernel object for a given invocation. This is not the case of HIPFunction objects, which are simply passed together with the arguments on the invocation. The PI Kernel implementation for HIP stores the list of arguments, argument sizes and offsets to emulate the interface of PI Kernel, saving the arguments for the later dispatch. Note that in PI API, the Local memory is specified as a size per individual argument, but in HIP only the total usage of shared memory is required since it is not passed as a parameter. A compiler pass converts the PI API local memory model into the HIP shared model. This object simply calculates the total of shared memory, and the initial offsets of each parameter.
Definition at line 773 of file pi_cuda.hpp.
using _pi_kernel::native_type = hipFunction_t |
Definition at line 563 of file pi_hip.hpp.
using _pi_kernel::native_type = CUfunction |
Definition at line 774 of file pi_cuda.hpp.
|
inline |
Note: this code assumes that there is only one device per context
Definition at line 871 of file pi_cuda.hpp.
|
inline |
Definition at line 885 of file pi_cuda.hpp.
References context_, cuda_piContextRelease(), cuda_piProgramRelease(), and program_.
|
inline |
Definition at line 217 of file pi_esimd_emulator.hpp.
|
inline |
Definition at line 657 of file pi_hip.hpp.
|
inline |
Definition at line 665 of file pi_hip.hpp.
|
inline |
Definition at line 669 of file pi_hip.hpp.
References context_, hip_piContextRelease(), hip_piProgramRelease(), and program_.
|
inline |
Definition at line 1445 of file pi_level_zero.hpp.
|
inline |
Definition at line 720 of file pi_hip.hpp.
References args_, and _pi_kernel::arguments::clear_local_size().
|
inline |
Definition at line 936 of file pi_cuda.hpp.
References args_, and _pi_kernel::arguments::clear_local_size().
|
inlinenoexcept |
Definition at line 678 of file pi_hip.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 894 of file pi_cuda.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 682 of file pi_hip.hpp.
References function_.
|
inlinenoexcept |
Definition at line 898 of file pi_cuda.hpp.
References function_.
|
inline |
Definition at line 714 of file pi_hip.hpp.
References args_, and _pi_kernel::arguments::get_indices().
|
inline |
Definition at line 930 of file pi_cuda.hpp.
References args_, and _pi_kernel::arguments::get_indices().
|
inlinenoexcept |
Definition at line 692 of file pi_hip.hpp.
References context_.
|
inlinenoexcept |
Definition at line 908 of file pi_cuda.hpp.
References context_.
|
inlinenoexcept |
Definition at line 718 of file pi_hip.hpp.
References args_, and _pi_kernel::arguments::get_local_size().
|
inlinenoexcept |
Definition at line 934 of file pi_cuda.hpp.
References args_, and _pi_kernel::arguments::get_local_size().
|
inlinenoexcept |
Definition at line 694 of file pi_hip.hpp.
References name_.
|
inlinenoexcept |
Definition at line 910 of file pi_cuda.hpp.
References name_.
|
inlinenoexcept |
Returns the number of arguments, excluding the implicit global offset.
Note this only returns the current known number of arguments, not the real one required by the kernel, since this cannot be queried from the HIP Driver API
Definition at line 700 of file pi_hip.hpp.
References args_, and _pi_kernel::arguments::indices_.
|
inlinenoexcept |
Returns the number of arguments, excluding the implicit global offset.
Note this only returns the current known number of arguments, not the real one required by the kernel, since this cannot be queried from the CUDA Driver API
Definition at line 916 of file pi_cuda.hpp.
References args_, and _pi_kernel::arguments::indices_.
|
inlinenoexcept |
Definition at line 674 of file pi_hip.hpp.
References program_.
|
inlinenoexcept |
Definition at line 890 of file pi_cuda.hpp.
References program_.
|
inlinenoexcept |
Definition at line 680 of file pi_hip.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 896 of file pi_cuda.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 684 of file pi_hip.hpp.
References functionWithOffsetParam_.
|
inlinenoexcept |
Definition at line 900 of file pi_cuda.hpp.
References functionWithOffsetParam_.
|
inlinenoexcept |
Definition at line 688 of file pi_hip.hpp.
References functionWithOffsetParam_.
|
inlinenoexcept |
Definition at line 904 of file pi_cuda.hpp.
References functionWithOffsetParam_.
|
inline |
Definition at line 1453 of file pi_level_zero.hpp.
|
inlinenoexcept |
Definition at line 676 of file pi_hip.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 892 of file pi_cuda.hpp.
References refCount_.
pi_result _pi_kernel::initialize | ( | ) |
Definition at line 4780 of file pi_level_zero.cpp.
References _pi_program::Context, PI_CALL, piContextRetain(), piProgramRetain(), _pi_queue::Properties, and ZE_CALL_NOCHECK.
|
inline |
Definition at line 710 of file pi_hip.hpp.
References args_, and _pi_kernel::arguments::set_implicit_offset().
|
inline |
Definition at line 926 of file pi_cuda.hpp.
References args_, and _pi_kernel::arguments::set_implicit_offset().
|
inline |
Definition at line 702 of file pi_hip.hpp.
References _pi_kernel::arguments::add_arg(), and args_.
|
inline |
Definition at line 918 of file pi_cuda.hpp.
References _pi_kernel::arguments::add_arg(), and args_.
|
inline |
Definition at line 706 of file pi_hip.hpp.
References _pi_kernel::arguments::add_local_arg(), and args_.
|
inline |
Definition at line 922 of file pi_cuda.hpp.
References _pi_kernel::arguments::add_local_arg(), and args_.
struct _pi_kernel::arguments _pi_kernel::args_ |
pi_context _pi_kernel::context_ |
Definition at line 779 of file pi_cuda.hpp.
Referenced by get_context(), and ~_pi_kernel().
native_type _pi_kernel::function_ |
Definition at line 776 of file pi_cuda.hpp.
Referenced by get().
native_type _pi_kernel::functionWithOffsetParam_ |
Definition at line 777 of file pi_cuda.hpp.
Referenced by get_with_offset_parameter(), and has_with_offset_parameter().
std::unordered_set<std::pair<void *const, MemAllocRecord> *, Hash> _pi_kernel::MemAllocs |
Definition at line 1491 of file pi_level_zero.hpp.
Referenced by piKernelRelease().
std::string _pi_kernel::name_ |
Definition at line 778 of file pi_cuda.hpp.
Referenced by get_name().
bool _pi_kernel::OwnZeKernel |
Definition at line 1464 of file pi_level_zero.hpp.
Referenced by piKernelRelease().
std::vector<ArgumentInfo> _pi_kernel::PendingArguments |
Definition at line 1514 of file pi_level_zero.hpp.
Referenced by piEnqueueKernelLaunch(), and piextKernelSetArgMemObj().
pi_program _pi_kernel::Program |
Definition at line 1467 of file pi_level_zero.hpp.
Referenced by piEnqueueKernelLaunch(), piKernelGetInfo(), and piKernelRelease().
pi_program _pi_kernel::program_ |
Definition at line 780 of file pi_cuda.hpp.
Referenced by get_program(), and ~_pi_kernel().
std::atomic_uint32_t _pi_kernel::refCount_ |
Definition at line 781 of file pi_cuda.hpp.
Referenced by decrement_reference_count(), get_reference_count(), and increment_reference_count().
|
staticconstexpr |
Definition at line 783 of file pi_cuda.hpp.
size_t _pi_kernel::reqdThreadsPerBlock_[REQD_THREADS_PER_BLOCK_DIMENSIONS] |
Definition at line 784 of file pi_cuda.hpp.
std::atomic<pi_uint32> _pi_kernel::SubmissionsCount |
Definition at line 1502 of file pi_level_zero.hpp.
Referenced by piKernelRelease().
ze_kernel_handle_t _pi_kernel::ZeKernel |
Definition at line 1460 of file pi_level_zero.hpp.
Referenced by piEnqueueKernelLaunch(), piextKernelGetNativeHandle(), piextKernelSetArgSampler(), piKernelGetGroupInfo(), piKernelGetInfo(), piKernelRelease(), piKernelSetArg(), and piKernelSetExecInfo().
ZeCache<std::string> _pi_kernel::ZeKernelName |
Definition at line 1518 of file pi_level_zero.hpp.
Referenced by piKernelGetInfo().
Definition at line 1517 of file pi_level_zero.hpp.
Referenced by piKernelGetGroupInfo(), piKernelGetInfo(), and piKernelGetSubGroupInfo().