DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_cuda.cpp File Reference

Implementation of CUDA Plugin. More...

#include <pi_cuda.hpp>
#include <sycl/detail/cuda_definitions.hpp>
#include <sycl/detail/defines.hpp>
#include <sycl/detail/pi.hpp>
#include <algorithm>
#include <cassert>
#include <cuda.h>
#include <cuda_device_runtime_api.h>
#include <limits>
#include <memory>
#include <mutex>
#include <regex>
Include dependency graph for pi_cuda.cpp:

Go to the source code of this file.

Classes

class  ReleaseGuard< T >
 RAII object that calls the reference count release function on the held PI object on destruction. More...
 

Namespaces

 sycl
 ---— Error handling, matching OpenCL plugin semantics.
 
 sycl::_V1
 
 sycl::_V1::detail
 
 sycl::_V1::detail::pi
 

Macros

#define _PI_CL(pi_api, cuda_api)    (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);
 

Functions

void enableCUDATracing ()
 
void disableCUDATracing ()
 
void sycl::_V1::detail::pi::die (const char *Message)
 
void sycl::_V1::detail::pi::cuPrint (const char *Message)
 
void sycl::_V1::detail::pi::assertion (bool Condition, const char *Message=nullptr)
 
pi_result cuda_piEnqueueEventsWait (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Enqueues a wait on the given CUstream for all events. More...
 
pi_result cuda_piEnqueueEventsWaitWithBarrier (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue. More...
 
pi_result cuda_piEventRelease (pi_event event)
 
pi_result cuda_piEventRetain (pi_event event)
 
pi_result enqueueEventWait (pi_queue queue, pi_event event)
 
bool get_kernel_metadata (std::string metadataName, const char *tag, std::string &kernelName)
 
std::string getKernelNames (pi_program)
 Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this. More...
 
pi_result cuda_piDeviceGetInfo (pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piPlatformsGet (pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
 Obtains the CUDA platform. More...
 
pi_result cuda_piPlatformGetInfo (pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piDevicesGet (pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
 
pi_result cuda_piDeviceRetain (pi_device)
 
pi_result cuda_piContextGetInfo (pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piContextRetain (pi_context context)
 
pi_result cuda_piextContextSetExtendedDeleter (pi_context context, pi_context_extended_deleter function, void *user_data)
 
pi_result cuda_piDevicePartition (pi_device, const pi_device_partition_property *, pi_uint32, pi_device *, pi_uint32 *)
 Not applicable to CUDA, devices cannot be partitioned. More...
 
pi_result cuda_piextDeviceSelectBinary (pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary)
 
pi_result cuda_piextGetDeviceFunctionPointer (pi_device device, pi_program program, const char *func_name, pi_uint64 *func_pointer_ret)
 
pi_result cuda_piDeviceRelease (pi_device)
 
pi_result cuda_piextDeviceGetNativeHandle (pi_device device, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI device object. More...
 
pi_result cuda_piextDeviceCreateWithNativeHandle (pi_native_handle nativeHandle, pi_platform platform, pi_device *piDevice)
 Created a PI device object from a CUDA device handle. More...
 
pi_result cuda_piContextCreate (const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data), void *user_data, pi_context *retcontext)
 Create a PI CUDA context. More...
 
pi_result cuda_piContextRelease (pi_context ctxt)
 
pi_result cuda_piextContextGetNativeHandle (pi_context context, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI context object. More...
 
pi_result cuda_piextContextCreateWithNativeHandle (pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext)
 Created a PI context object from a CUDA context handle. More...
 
pi_result cuda_piMemBufferCreate (pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties)
 Creates a PI Memory object using a CUDA memory allocation. More...
 
pi_result cuda_piMemRelease (pi_mem memObj)
 Decreases the reference count of the Mem object. More...
 
pi_result cuda_piMemBufferPartition (pi_mem parent_buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *memObj)
 Implements a buffer partition in the CUDA backend. More...
 
pi_result cuda_piMemGetInfo (pi_mem, pi_mem_info, size_t, void *, size_t *)
 
pi_result cuda_piextMemGetNativeHandle (pi_mem mem, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI mem object. More...
 
pi_result cuda_piextMemCreateWithNativeHandle (pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *mem)
 Created a PI mem object from a CUDA mem handle. More...
 
pi_result cuda_piQueueCreate (pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
 Creates a pi_queue object on the CUDA backend. More...
 
pi_result cuda_piQueueGetInfo (pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piQueueRetain (pi_queue command_queue)
 
pi_result cuda_piQueueRelease (pi_queue command_queue)
 
pi_result cuda_piQueueFinish (pi_queue command_queue)
 
pi_result cuda_piQueueFlush (pi_queue command_queue)
 
pi_result cuda_piextQueueGetNativeHandle (pi_queue queue, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI queue object. More...
 
pi_result cuda_piextQueueCreateWithNativeHandle (pi_native_handle nativeHandle, pi_context context, pi_device device, bool ownNativeHandle, pi_queue *queue)
 Created a PI queue object from a CUDA queue handle. More...
 
pi_result cuda_piEnqueueMemBufferWrite (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferRead (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEventsWait (pi_uint32 num_events, const pi_event *event_list)
 
pi_result cuda_piKernelCreate (pi_program program, const char *kernel_name, pi_kernel *kernel)
 
pi_result cuda_piKernelSetArg (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
 
pi_result cuda_piextKernelSetArgMemObj (pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
 
pi_result cuda_piextKernelSetArgSampler (pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
 
pi_result cuda_piKernelGetGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piEnqueueKernelLaunch (pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueNativeKernel (pi_queue, void(*)(void *), void *, size_t, pi_uint32, const pi_mem *, const void **, pi_uint32, const pi_event *, pi_event *)
 \TODO Not implemented More...
 
pi_result cuda_piextKernelCreateWithNativeHandle (pi_native_handle, pi_context, pi_program, bool, pi_kernel *)
 
pi_result cuda_piMemImageCreate (pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
 \TODO Not implemented More...
 
pi_result cuda_piMemImageGetInfo (pi_mem, pi_image_info, size_t, void *, size_t *)
 \TODO Not implemented More...
 
pi_result cuda_piMemRetain (pi_mem mem)
 
pi_result cuda_piclProgramCreateWithSource (pi_context, pi_uint32, const char **, const size_t *, pi_program *)
 Not used as CUDA backend only creates programs from binary. More...
 
pi_result cuda_piProgramBuild (pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
 Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels). More...
 
pi_result cuda_piProgramCreate (pi_context, const void *, size_t, pi_program *)
 \TODO Not implemented More...
 
pi_result cuda_piProgramCreateWithBinary (pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *program)
 Loads images from a list of PTX or CUBIN binaries. More...
 
pi_result cuda_piProgramGetInfo (pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piProgramLink (pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
 Creates a new PI program object that is the outcome of linking all input programs. More...
 
pi_result cuda_piProgramCompile (pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
 Creates a new program that is the outcome of the compilation of the headers and the program. More...
 
pi_result cuda_piProgramGetBuildInfo (pi_program program, pi_device device, pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piProgramRetain (pi_program program)
 
pi_result cuda_piProgramRelease (pi_program program)
 Decreases the reference count of a pi_program object. More...
 
pi_result cuda_piextProgramGetNativeHandle (pi_program program, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI program object. More...
 
pi_result cuda_piextProgramCreateWithNativeHandle (pi_native_handle, pi_context, bool, pi_program *)
 Created a PI program object from a CUDA program handle. More...
 
pi_result cuda_piKernelGetInfo (pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piKernelGetSubGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piKernelRetain (pi_kernel kernel)
 
pi_result cuda_piKernelRelease (pi_kernel kernel)
 
pi_result cuda_piKernelSetExecInfo (pi_kernel, pi_kernel_exec_info, size_t, const void *)
 
pi_result cuda_piextProgramSetSpecializationConstant (pi_program, pi_uint32, size_t, const void *)
 
pi_result cuda_piextKernelSetArgPointer (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
 
pi_result cuda_piEventCreate (pi_context, pi_event *)
 
pi_result cuda_piEventGetInfo (pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piEventGetProfilingInfo (pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 Obtain profiling information from PI CUDA events \TODO Timings from CUDA are only elapsed time. More...
 
pi_result cuda_piEventSetCallback (pi_event, pi_int32, pfn_notify, void *)
 
pi_result cuda_piEventSetStatus (pi_event, pi_int32)
 
pi_result cuda_piextEventGetNativeHandle (pi_event event, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI event object. More...
 
pi_result cuda_piextEventCreateWithNativeHandle (pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *event)
 Created a PI event object from a CUDA event handle. More...
 
pi_result cuda_piSamplerCreate (pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
 Creates a PI sampler object. More...
 
pi_result cuda_piSamplerGetInfo (pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 Gets information from a PI sampler object. More...
 
pi_result cuda_piSamplerRetain (pi_sampler sampler)
 Retains a PI sampler object, incrementing its reference count. More...
 
pi_result cuda_piSamplerRelease (pi_sampler sampler)
 Releases a PI sampler object, decrementing its reference count. More...
 
static pi_result commonEnqueueMemBufferCopyRect (CUstream cu_stream, pi_buff_rect_region region, const void *src_ptr, const CUmemorytype_enum src_type, pi_buff_rect_offset src_offset, size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr, const CUmemorytype_enum dst_type, pi_buff_rect_offset dst_offset, size_t dst_row_pitch, size_t dst_slice_pitch)
 General 3D memory copy operation. More...
 
pi_result cuda_piEnqueueMemBufferReadRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferWriteRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferCopy (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferCopyRect (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferFill (pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
static size_t imageElementByteSize (CUDA_ARRAY_DESCRIPTOR array_desc)
 
static pi_result commonEnqueueMemImageNDCopy (CUstream cu_stream, pi_mem_type img_type, const size_t *region, const void *src_ptr, const CUmemorytype_enum src_type, const size_t *src_offset, void *dst_ptr, const CUmemorytype_enum dst_type, const size_t *dst_offset)
 General ND memory copy operation for images (where N > 1). More...
 
pi_result cuda_piEnqueueMemImageRead (pi_queue command_queue, pi_mem image, pi_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageWrite (pi_queue command_queue, pi_mem image, pi_bool blocking_write, const size_t *origin, const size_t *region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageCopy (pi_queue command_queue, pi_mem src_image, pi_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageFill (pi_queue, pi_mem, const void *, const size_t *, const size_t *, pi_uint32, const pi_event *, pi_event *)
 \TODO Not implemented in CUDA. More...
 
pi_result cuda_piEnqueueMemBufferMap (pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
 Implements mapping on the host using a BufferRead operation. More...
 
pi_result cuda_piEnqueueMemUnmap (pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Implements the unmap from the host, using a BufferWrite operation. More...
 
pi_result cuda_piextUSMHostAlloc (void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
 USM: Implements USM Host allocations using CUDA Pinned Memory. More...
 
pi_result cuda_piextUSMDeviceAlloc (void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
 USM: Implements USM device allocations using a normal CUDA device pointer. More...
 
pi_result cuda_piextUSMSharedAlloc (void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
 USM: Implements USM Shared allocations using CUDA Managed Memory. More...
 
pi_result cuda_piextUSMFree (pi_context context, void *ptr)
 USM: Frees the given USM pointer associated with the context. More...
 
pi_result cuda_piextUSMEnqueueMemset (pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueueMemcpy (pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueuePrefetch (pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueueMemAdvise (pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
 USM: memadvise API to govern behavior of automatic migration mechanisms. More...
 
pi_result cuda_piextUSMGetMemAllocInfo (pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation. More...
 
pi_result cuda_piTearDown (void *)
 
pi_result piPluginInit (pi_plugin *PluginInit)
 

Variables

const char SupportedVersion [] = _PI_CUDA_PLUGIN_VERSION_STRING
 

Detailed Description

Implementation of CUDA Plugin.

Definition in file pi_cuda.cpp.

Macro Definition Documentation

◆ _PI_CL

#define _PI_CL (   pi_api,
  cuda_api 
)     (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);

Function Documentation

◆ commonEnqueueMemBufferCopyRect()

static pi_result commonEnqueueMemBufferCopyRect ( CUstream  cu_stream,
pi_buff_rect_region  region,
const void *  src_ptr,
const CUmemorytype_enum  src_type,
pi_buff_rect_offset  src_offset,
size_t  src_row_pitch,
size_t  src_slice_pitch,
void *  dst_ptr,
const CUmemorytype_enum  dst_type,
pi_buff_rect_offset  dst_offset,
size_t  dst_row_pitch,
size_t  dst_slice_pitch 
)
static

General 3D memory copy operation.

This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is on the device, src_ptr and/or dst_ptr must be a pointer to a CUdeviceptr

Definition at line 4185 of file pi_cuda.cpp.

References pi_buff_rect_region_struct::depth_scalar, pi_buff_rect_region_struct::height_scalar, pi_buff_rect_region_struct::width_bytes, pi_buff_rect_offset_struct::x_bytes, pi_buff_rect_offset_struct::y_scalar, and pi_buff_rect_offset_struct::z_scalar.

Referenced by cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferReadRect(), and cuda_piEnqueueMemBufferWriteRect().

◆ commonEnqueueMemImageNDCopy()

static pi_result commonEnqueueMemImageNDCopy ( CUstream  cu_stream,
pi_mem_type  img_type,
const size_t *  region,
const void *  src_ptr,
const CUmemorytype_enum  src_type,
const size_t *  src_offset,
void *  dst_ptr,
const CUmemorytype_enum  dst_type,
const size_t *  dst_offset 
)
static

General ND memory copy operation for images (where N > 1).

This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is an array, src_ptr and/or dst_ptr must be a pointer to a CUarray

Definition at line 4557 of file pi_cuda.cpp.

References PI_MEM_TYPE_IMAGE2D, and PI_MEM_TYPE_IMAGE3D.

Referenced by cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), and cuda_piEnqueueMemImageWrite().

◆ cuda_piclProgramCreateWithSource()

pi_result cuda_piclProgramCreateWithSource ( pi_context  ,
pi_uint32  ,
const char **  ,
const size_t *  ,
pi_program  
)

Not used as CUDA backend only creates programs from binary.

See cuda_piclProgramCreateWithBinary.

Definition at line 3338 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::cuPrint().

Referenced by piPluginInit().

◆ cuda_piContextCreate()

pi_result cuda_piContextCreate ( const pi_context_properties properties,
pi_uint32  num_devices,
const pi_device devices,
void(*)(const char *errinfo, const void *private_info, size_t cb, void *user_data)  pfn_notify,
void *  user_data,
pi_context retcontext 
)

Create a PI CUDA context.

By default creates a scoped context and keeps the last active CUDA context on top of the CUDA context stack. With the __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE creates a primary CUDA context and activates it on the CUDA context stack.

Parameters
[in]properties0 terminated array of key/id-value combinations. Can be nullptr. Only accepts property key/id __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY with a pi_bool value.
[in]num_devicesNumber of devices to create the context for.
[in]devicesDevices to create the context for.
[in]pfn_notifyCallback, currently unused.
[in]user_dataUser data for callback.
[out]retcontextSet to created context on success.
Returns
PI_SUCCESS on success, otherwise an error return code.

Definition at line 2060 of file pi_cuda.cpp.

References __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, sycl::_V1::detail::pi::die(), _pi_platform::evBase_, std::get(), PI_FALSE, PI_TRUE, _pi_context::primary, and _pi_context::user_defined.

Referenced by piPluginInit().

◆ cuda_piContextGetInfo()

◆ cuda_piContextRelease()

◆ cuda_piContextRetain()

◆ cuda_piDeviceGetInfo()

pi_result cuda_piDeviceGetInfo ( pi_device  device,
pi_device_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

Definition at line 1135 of file pi_cuda.cpp.

References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, sycl::_V1::detail::pi::assertion(), sycl::_V1::detail::pi::cuPrint(), sycl::_V1::detail::pi::die(), min(), PI_DEVICE_EXEC_CAPABILITIES_KERNEL, PI_DEVICE_INFO_ADDRESS_BITS, PI_DEVICE_INFO_ATOMIC_64, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, PI_DEVICE_INFO_AVAILABLE, PI_DEVICE_INFO_BACKEND_VERSION, PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, PI_DEVICE_INFO_BUILT_IN_KERNELS, PI_DEVICE_INFO_COMPILER_AVAILABLE, PI_DEVICE_INFO_DEVICE_ID, PI_DEVICE_INFO_DOUBLE_FP_CONFIG, PI_DEVICE_INFO_DRIVER_VERSION, PI_DEVICE_INFO_ENDIAN_LITTLE, PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT, PI_DEVICE_INFO_EXECUTION_CAPABILITIES, PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT, PI_DEVICE_INFO_EXTENSIONS, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE, PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_SIZE, PI_DEVICE_INFO_GPU_EU_COUNT, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, PI_DEVICE_INFO_GPU_SLICES, PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, PI_DEVICE_INFO_HALF_FP_CONFIG, PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_SUPPORT, PI_DEVICE_INFO_LINKER_AVAILABLE, PI_DEVICE_INFO_LOCAL_MEM_SIZE, PI_DEVICE_INFO_LOCAL_MEM_TYPE, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_MAX_COMPUTE_UNITS, PI_DEVICE_INFO_MAX_CONSTANT_ARGS, PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE, PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, PI_DEVICE_INFO_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, PI_DEVICE_INFO_MAX_SAMPLERS, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, PI_DEVICE_INFO_NAME, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_OPENCL_C_VERSION, PI_DEVICE_INFO_PARENT_DEVICE, PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, PI_DEVICE_INFO_PARTITION_PROPERTIES, PI_DEVICE_INFO_PARTITION_TYPE, PI_DEVICE_INFO_PCI_ADDRESS, PI_DEVICE_INFO_PLATFORM, PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, PI_DEVICE_INFO_PROFILE, PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION, PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES, PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES, PI_DEVICE_INFO_REFERENCE_COUNT, PI_DEVICE_INFO_SINGLE_FP_CONFIG, PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, PI_DEVICE_INFO_TYPE, PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, PI_DEVICE_INFO_USM_DEVICE_SUPPORT, PI_DEVICE_INFO_USM_HOST_SUPPORT, PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, PI_DEVICE_INFO_UUID, PI_DEVICE_INFO_VENDOR, PI_DEVICE_INFO_VENDOR_ID, PI_DEVICE_INFO_VERSION, PI_DEVICE_LOCAL_MEM_TYPE_LOCAL, PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE, PI_DEVICE_TYPE_GPU, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, PI_FALSE, PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT, PI_FP_DENORM, PI_FP_FMA, PI_FP_INF_NAN, PI_FP_ROUND_TO_INF, PI_FP_ROUND_TO_NEAREST, PI_FP_ROUND_TO_ZERO, PI_MEMORY_ORDER_ACQ_REL, PI_MEMORY_ORDER_ACQUIRE, PI_MEMORY_ORDER_RELAXED, PI_MEMORY_ORDER_RELEASE, PI_MEMORY_SCOPE_DEVICE, PI_MEMORY_SCOPE_SUB_GROUP, PI_MEMORY_SCOPE_SYSTEM, PI_MEMORY_SCOPE_WORK_GROUP, PI_MEMORY_SCOPE_WORK_ITEM, PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, PI_QUEUE_PROFILING_ENABLE, PI_TRUE, PI_USM_ACCESS, PI_USM_ATOMIC_ACCESS, PI_USM_CONCURRENT_ACCESS, and PI_USM_CONCURRENT_ATOMIC_ACCESS.

Referenced by cuda_piPlatformsGet(), and piPluginInit().

◆ cuda_piDevicePartition()

pi_result cuda_piDevicePartition ( pi_device  ,
const pi_device_partition_property ,
pi_uint32  ,
pi_device ,
pi_uint32  
)

Not applicable to CUDA, devices cannot be partitioned.

Definition at line 1072 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piDeviceRelease()

pi_result cuda_piDeviceRelease ( pi_device  )
Returns
PI_SUCCESS always since CUDA devices are always root devices.

Definition at line 1133 of file pi_cuda.cpp.

Referenced by piPluginInit(), _pi_context::~_pi_context(), and _pi_queue::~_pi_queue().

◆ cuda_piDeviceRetain()

pi_result cuda_piDeviceRetain ( pi_device  )
Returns
PI_SUCCESS if the function is executed successfully CUDA devices are always root devices so retain always returns success.

Definition at line 1013 of file pi_cuda.cpp.

Referenced by _pi_context::_pi_context(), _pi_queue::_pi_queue(), and piPluginInit().

◆ cuda_piDevicesGet()

pi_result cuda_piDevicesGet ( pi_platform  platform,
pi_device_type  device_type,
pi_uint32  num_entries,
pi_device devices,
pi_uint32 num_devices 
)
Parameters
devicesList of devices available on the system
num_devicesNumber of elements in the list of devices Requesting a non-GPU device triggers an error, all PI CUDA devices are GPUs.

Definition at line 981 of file pi_cuda.cpp.

References PI_DEVICE_TYPE_DEFAULT, and PI_DEVICE_TYPE_GPU.

Referenced by piPluginInit().

◆ cuda_piEnqueueEventsWait()

pi_result cuda_piEnqueueEventsWait ( pi_queue  command_queue,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Enqueues a wait on the given CUstream for all events.

See enqueueEventWait TODO: Add support for multiple streams once the Event class is properly refactored.

Definition at line 3905 of file pi_cuda.cpp.

References cuda_piEnqueueEventsWaitWithBarrier().

Referenced by cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemUnmap(), and piPluginInit().

◆ cuda_piEnqueueEventsWaitWithBarrier()

pi_result cuda_piEnqueueEventsWaitWithBarrier ( pi_queue  command_queue,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue.

Parameters
[in]command_queueA valid PI queue.
[in]num_events_in_wait_listNumber of events in event_wait_list.
[in]event_wait_listEvents to wait on.
[out]eventEvent for when all events in event_wait_list have finished or, if event_wait_list is empty, when all previous events in the queue have finished.
Returns
TBD

Definition at line 3925 of file pi_cuda.cpp.

References _pi_queue::barrier_event_, _pi_queue::barrier_mutex_, _pi_queue::barrier_tmp_event_, _pi_queue::compute_applied_barrier_, _pi_queue::get_context(), _pi_queue::get_next_compute_stream(), _pi_event::make_native(), PI_COMMAND_TYPE_MARKER, _pi_queue::sync_streams(), and _pi_queue::transfer_applied_barrier_.

Referenced by cuda_piEnqueueEventsWait(), cuda_piEnqueueKernelLaunch(), and piPluginInit().

◆ cuda_piEnqueueKernelLaunch()

pi_result cuda_piEnqueueKernelLaunch ( pi_queue  command_queue,
pi_kernel  kernel,
pi_uint32  work_dim,
const size_t *  global_work_offset,
const size_t *  global_work_size,
const size_t *  local_work_size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferCopy()

pi_result cuda_piEnqueueMemBufferCopy ( pi_queue  command_queue,
pi_mem  src_buffer,
pi_mem  dst_buffer,
size_t  src_offset,
size_t  dst_offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferCopyRect()

pi_result cuda_piEnqueueMemBufferCopyRect ( pi_queue  command_queue,
pi_mem  src_buffer,
pi_mem  dst_buffer,
pi_buff_rect_offset  src_origin,
pi_buff_rect_offset  dst_origin,
pi_buff_rect_region  region,
size_t  src_row_pitch,
size_t  src_slice_pitch,
size_t  dst_row_pitch,
size_t  dst_slice_pitch,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferFill()

pi_result cuda_piEnqueueMemBufferFill ( pi_queue  command_queue,
pi_mem  buffer,
const void *  pattern,
size_t  pattern_size,
size_t  offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferMap()

pi_result cuda_piEnqueueMemBufferMap ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_map,
pi_map_flags  map_flags,
size_t  offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event,
void **  ret_map 
)

Implements mapping on the host using a BufferRead operation.

Mapped pointers are stored in the pi_mem object. If the buffer uses pinned host memory a pointer to that memory is returned and no read operation is done.

Definition at line 4837 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::allocMode_, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_, cuda_piEnqueueEventsWait(), cuda_piEnqueueMemBufferRead(), _pi_queue::get_context(), _pi_mem::mem_::buffer_mem_::get_map_ptr(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_::buffer_mem_::map_to_ptr(), _pi_mem::mem_, _pi_mem::mem_type_, PI_COMMAND_TYPE_MEM_BUFFER_MAP, PI_MAP_READ, and PI_MAP_WRITE.

Referenced by piPluginInit().

◆ cuda_piEnqueueMemBufferRead()

pi_result cuda_piEnqueueMemBufferRead ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_read,
size_t  offset,
size_t  size,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferReadRect()

pi_result cuda_piEnqueueMemBufferReadRect ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_read,
pi_buff_rect_offset  buffer_offset,
pi_buff_rect_offset  host_offset,
pi_buff_rect_region  region,
size_t  buffer_row_pitch,
size_t  buffer_slice_pitch,
size_t  host_row_pitch,
size_t  host_slice_pitch,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferWrite()

pi_result cuda_piEnqueueMemBufferWrite ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_write,
size_t  offset,
size_t  size,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferWriteRect()

pi_result cuda_piEnqueueMemBufferWriteRect ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_write,
pi_buff_rect_offset  buffer_offset,
pi_buff_rect_offset  host_offset,
pi_buff_rect_region  region,
size_t  buffer_row_pitch,
size_t  buffer_slice_pitch,
size_t  host_row_pitch,
size_t  host_slice_pitch,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageCopy()

pi_result cuda_piEnqueueMemImageCopy ( pi_queue  command_queue,
pi_mem  src_image,
pi_mem  dst_image,
const size_t *  src_origin,
const size_t *  dst_origin,
const size_t *  region,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageFill()

pi_result cuda_piEnqueueMemImageFill ( pi_queue  ,
pi_mem  ,
const void *  ,
const size_t *  ,
const size_t *  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

\TODO Not implemented in CUDA.

Definition at line 4825 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEnqueueMemImageRead()

pi_result cuda_piEnqueueMemImageRead ( pi_queue  command_queue,
pi_mem  image,
pi_bool  blocking_read,
const size_t *  origin,
const size_t *  region,
size_t  row_pitch,
size_t  slice_pitch,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageWrite()

pi_result cuda_piEnqueueMemImageWrite ( pi_queue  command_queue,
pi_mem  image,
pi_bool  blocking_write,
const size_t *  origin,
const size_t *  region,
size_t  input_row_pitch,
size_t  input_slice_pitch,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemUnmap()

pi_result cuda_piEnqueueMemUnmap ( pi_queue  command_queue,
pi_mem  memobj,
void *  mapped_ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueNativeKernel()

pi_result cuda_piEnqueueNativeKernel ( pi_queue  ,
void(*)(void *)  ,
void *  ,
size_t  ,
pi_uint32  ,
const pi_mem ,
const void **  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

\TODO Not implemented

Definition at line 3151 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventCreate()

pi_result cuda_piEventCreate ( pi_context  ,
pi_event  
)

Definition at line 3787 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventGetInfo()

pi_result cuda_piEventGetInfo ( pi_event  event,
pi_event_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piEventGetProfilingInfo()

pi_result cuda_piEventGetProfilingInfo ( pi_event  event,
pi_profiling_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piEventRelease()

pi_result cuda_piEventRelease ( pi_event  event)

Definition at line 3875 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::assertion().

Referenced by piPluginInit().

◆ cuda_piEventRetain()

pi_result cuda_piEventRetain ( pi_event  event)

Definition at line 3863 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::assertion().

Referenced by piPluginInit().

◆ cuda_piEventSetCallback()

pi_result cuda_piEventSetCallback ( pi_event  ,
pi_int32  ,
pfn_notify  ,
void *   
)

Definition at line 3853 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventSetStatus()

pi_result cuda_piEventSetStatus ( pi_event  ,
pi_int32   
)

Definition at line 3858 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventsWait()

pi_result cuda_piEventsWait ( pi_uint32  num_events,
const pi_event event_list 
)

Definition at line 2778 of file pi_cuda.cpp.

References _pi_event::get_context().

Referenced by piPluginInit().

◆ cuda_piextContextCreateWithNativeHandle()

pi_result cuda_piextContextCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_uint32  num_devices,
const pi_device devices,
bool  ownNativeHandle,
pi_context piContext 
)

Created a PI context object from a CUDA context handle.

NOTE: The created PI object does not take ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI context object from.
[out]contextSet to the PI context object created from native handle.
Returns
TBD

Definition at line 2204 of file pi_cuda.cpp.

References cuda_piextDeviceCreateWithNativeHandle(), and _pi_context::user_defined.

Referenced by piPluginInit().

◆ cuda_piextContextGetNativeHandle()

pi_result cuda_piextContextGetNativeHandle ( pi_context  context,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI context object.

Parameters
[in]contextThe PI context to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI context object.
Returns
PI_SUCCESS

Definition at line 2191 of file pi_cuda.cpp.

References _pi_context::get().

Referenced by piPluginInit().

◆ cuda_piextContextSetExtendedDeleter()

pi_result cuda_piextContextSetExtendedDeleter ( pi_context  context,
pi_context_extended_deleter  function,
void *  user_data 
)

Definition at line 1065 of file pi_cuda.cpp.

References _pi_context::set_extended_deleter().

Referenced by piPluginInit().

◆ cuda_piextDeviceCreateWithNativeHandle()

pi_result cuda_piextDeviceCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_platform  platform,
pi_device piDevice 
)

Created a PI device object from a CUDA device handle.

NOTE: The created PI object does not take ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI device object from.
[in]platformis the PI platform of the device.
[out]deviceSet to the PI device object created from native handle.
Returns
TBD

Definition at line 1992 of file pi_cuda.cpp.

References cuda_piPlatformsGet(), _pi_device::get(), and sycl::_V1::malloc().

Referenced by cuda_piextContextCreateWithNativeHandle(), and piPluginInit().

◆ cuda_piextDeviceGetNativeHandle()

pi_result cuda_piextDeviceGetNativeHandle ( pi_device  device,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI device object.

Parameters
[in]deviceThe PI device to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI device object.
Returns
PI_SUCCESS

Definition at line 1978 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextDeviceSelectBinary()

pi_result cuda_piextDeviceSelectBinary ( pi_device  device,
pi_device_binary binaries,
pi_uint32  num_binaries,
pi_uint32 selected_binary 
)
Returns
If available, the first binary that is PTX

Definition at line 1080 of file pi_cuda.cpp.

References __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64, and sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextEventCreateWithNativeHandle()

pi_result cuda_piextEventCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_context  context,
bool  ownNativeHandle,
pi_event event 
)

Created a PI event object from a CUDA event handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI event object from.
[out]eventSet to the PI event object created from native handle.
Returns
TBD

Definition at line 4027 of file pi_cuda.cpp.

References _pi_event::make_with_native().

Referenced by piPluginInit().

◆ cuda_piextEventGetNativeHandle()

pi_result cuda_piextEventGetNativeHandle ( pi_event  event,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI event object.

Parameters
[in]eventThe PI event to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI event object.
Returns
PI_SUCCESS on success. PI_ERROR_INVALID_EVENT if given a user event.

Definition at line 4013 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextGetDeviceFunctionPointer()

pi_result cuda_piextGetDeviceFunctionPointer ( pi_device  device,
pi_program  program,
const char *  func_name,
pi_uint64 func_pointer_ret 
)

Definition at line 1108 of file pi_cuda.cpp.

References _pi_program::get(), _pi_program::get_context(), and _pi_context::get_device().

Referenced by piPluginInit().

◆ cuda_piextKernelCreateWithNativeHandle()

pi_result cuda_piextKernelCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
pi_program  ,
bool  ,
pi_kernel  
)

Definition at line 3158 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextKernelSetArgMemObj()

◆ cuda_piextKernelSetArgPointer()

pi_result cuda_piextKernelSetArgPointer ( pi_kernel  kernel,
pi_uint32  arg_index,
size_t  arg_size,
const void *  arg_value 
)

Definition at line 3777 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextKernelSetArgSampler()

pi_result cuda_piextKernelSetArgSampler ( pi_kernel  kernel,
pi_uint32  arg_index,
const pi_sampler arg_value 
)

Definition at line 2904 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextMemCreateWithNativeHandle()

pi_result cuda_piextMemCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_context  context,
bool  ownNativeHandle,
pi_mem mem 
)

Created a PI mem object from a CUDA mem handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI mem object from.
[in]contextThe PI context of the memory allocation.
[in]ownNativeHandleIndicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT.
[out]memSet to the PI mem object created from native handle.
Returns
TBD

Definition at line 2469 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextMemGetNativeHandle()

pi_result cuda_piextMemGetNativeHandle ( pi_mem  mem,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI mem object.

Parameters
[in]memThe PI mem to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI mem object.
Returns
PI_SUCCESS

Definition at line 2452 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), and _pi_mem::mem_.

Referenced by piPluginInit().

◆ cuda_piextProgramCreateWithNativeHandle()

pi_result cuda_piextProgramCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
bool  ,
pi_program  
)

Created a PI program object from a CUDA program handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI program object from.
[in]contextThe PI context of the program.
[out]programSet to the PI program object created from native handle.
Returns
TBD

Definition at line 3641 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextProgramGetNativeHandle()

pi_result cuda_piextProgramGetNativeHandle ( pi_program  program,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI program object.

Parameters
[in]programThe PI program to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI program object.
Returns
TBD

Definition at line 3626 of file pi_cuda.cpp.

References _pi_program::get().

Referenced by piPluginInit().

◆ cuda_piextProgramSetSpecializationConstant()

pi_result cuda_piextProgramSetSpecializationConstant ( pi_program  ,
pi_uint32  ,
size_t  ,
const void *   
)

Definition at line 3769 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextQueueCreateWithNativeHandle()

pi_result cuda_piextQueueCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_context  context,
pi_device  device,
bool  ownNativeHandle,
pi_queue queue 
)

Created a PI queue object from a CUDA queue handle.

NOTE: The created PI object does not take ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI queue object from.
[in]contextis the PI context of the queue.
[out]queueSet to the PI queue object created from native handle.
ownNativeHandletells if SYCL RT should assume the ownership of the native handle, if it can.
Returns
TBD

Definition at line 2646 of file pi_cuda.cpp.

References __SYCL_PI_CUDA_SYNC_WITH_DEFAULT, __SYCL_PI_CUDA_USE_DEFAULT_STREAM, sycl::_V1::detail::pi::die(), and _pi_context::get_device().

Referenced by piPluginInit().

◆ cuda_piextQueueGetNativeHandle()

pi_result cuda_piextQueueGetNativeHandle ( pi_queue  queue,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI queue object.

Parameters
[in]queueThe PI queue to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI queue object.
Returns
PI_SUCCESS

Definition at line 2628 of file pi_cuda.cpp.

References _pi_queue::get_context(), and _pi_queue::get_next_compute_stream().

Referenced by piPluginInit().

◆ cuda_piextUSMDeviceAlloc()

pi_result cuda_piextUSMDeviceAlloc ( void **  result_ptr,
pi_context  context,
pi_device  device,
pi_usm_mem_properties properties,
size_t  size,
pi_uint32  alignment 
)

USM: Implements USM device allocations using a normal CUDA device pointer.

Definition at line 4974 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueueMemAdvise()

◆ cuda_piextUSMEnqueueMemcpy()

pi_result cuda_piextUSMEnqueueMemcpy ( pi_queue  queue,
pi_bool  blocking,
void *  dst_ptr,
const void *  src_ptr,
size_t  size,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

◆ cuda_piextUSMEnqueueMemset()

pi_result cuda_piextUSMEnqueueMemset ( pi_queue  queue,
void *  ptr,
pi_int32  value,
size_t  count,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

◆ cuda_piextUSMEnqueuePrefetch()

pi_result cuda_piextUSMEnqueuePrefetch ( pi_queue  queue,
const void *  ptr,
size_t  size,
pi_usm_migration_flags  flags,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

◆ cuda_piextUSMFree()

pi_result cuda_piextUSMFree ( pi_context  context,
void *  ptr 
)

USM: Frees the given USM pointer associated with the context.

Definition at line 5023 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextUSMGetMemAllocInfo()

pi_result cuda_piextUSMGetMemAllocInfo ( pi_context  context,
const void *  ptr,
pi_mem_alloc_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation.

Result must fit in void * PI_MEM_ALLOC_SIZE returns how big the queried pointer's allocation is in bytes. Result is a size_t. PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against

Parameters
contextis the pi_context
ptris the pointer to query
param_nameis the type of query to perform
param_value_sizeis the size of the result in bytes
param_valueis the result
param_value_size_retis how many bytes were written

Definition at line 5301 of file pi_cuda.cpp.

References cuda_piPlatformsGet(), PI_MEM_ALLOC_BASE_PTR, PI_MEM_ALLOC_DEVICE, PI_MEM_ALLOC_SIZE, PI_MEM_ALLOC_TYPE, PI_MEM_TYPE_DEVICE, PI_MEM_TYPE_HOST, PI_MEM_TYPE_SHARED, and PI_MEM_TYPE_UNKNOWN.

Referenced by piPluginInit().

◆ cuda_piextUSMHostAlloc()

pi_result cuda_piextUSMHostAlloc ( void **  result_ptr,
pi_context  context,
pi_usm_mem_properties properties,
size_t  size,
pi_uint32  alignment 
)

USM: Implements USM Host allocations using CUDA Pinned Memory.

Definition at line 4952 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextUSMSharedAlloc()

pi_result cuda_piextUSMSharedAlloc ( void **  result_ptr,
pi_context  context,
pi_device  device,
pi_usm_mem_properties properties,
size_t  size,
pi_uint32  alignment 
)

USM: Implements USM Shared allocations using CUDA Managed Memory.

Definition at line 4998 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelCreate()

pi_result cuda_piKernelCreate ( pi_program  program,
const char *  kernel_name,
pi_kernel kernel 
)

Definition at line 2813 of file pi_cuda.cpp.

References _pi_program::get(), and _pi_program::get_context().

Referenced by piPluginInit().

◆ cuda_piKernelGetGroupInfo()

◆ cuda_piKernelGetInfo()

pi_result cuda_piKernelGetInfo ( pi_kernel  kernel,
pi_kernel_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piKernelGetSubGroupInfo()

pi_result cuda_piKernelGetSubGroupInfo ( pi_kernel  kernel,
pi_device  device,
pi_kernel_sub_group_info  param_name,
size_t  input_value_size,
const void *  input_value,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piKernelRelease()

pi_result cuda_piKernelRelease ( pi_kernel  kernel)

Definition at line 3745 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelRetain()

pi_result cuda_piKernelRetain ( pi_kernel  kernel)

Definition at line 3737 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelSetArg()

pi_result cuda_piKernelSetArg ( pi_kernel  kernel,
pi_uint32  arg_index,
size_t  arg_size,
const void *  arg_value 
)

Definition at line 2853 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelSetExecInfo()

pi_result cuda_piKernelSetExecInfo ( pi_kernel  ,
pi_kernel_exec_info  ,
size_t  ,
const void *   
)

Definition at line 3764 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piMemBufferCreate()

pi_result cuda_piMemBufferCreate ( pi_context  context,
pi_mem_flags  flags,
size_t  size,
void *  host_ptr,
pi_mem ret_mem,
const pi_mem_properties properties 
)

Creates a PI Memory object using a CUDA memory allocation.

Can trigger a manual copy depending on the mode. \TODO Implement USE_HOST_PTR using cuHostRegister

Definition at line 2238 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::mem_::buffer_mem_::copy_in, PI_MEM_FLAGS_HOST_PTR_ALLOC, PI_MEM_FLAGS_HOST_PTR_COPY, PI_MEM_FLAGS_HOST_PTR_USE, and _pi_mem::mem_::buffer_mem_::use_host_ptr.

Referenced by piPluginInit().

◆ cuda_piMemBufferPartition()

pi_result cuda_piMemBufferPartition ( pi_mem  parent_buffer,
pi_mem_flags  flags,
pi_buffer_create_type  buffer_create_type,
void *  buffer_create_info,
pi_mem memObj 
)

Implements a buffer partition in the CUDA backend.

A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented as an offset over an existing CUDA allocation.

Definition at line 2378 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::context_, ReleaseGuard< T >::dismiss(), _pi_mem::mem_::buffer_mem_::get_size(), _pi_mem::mem_::buffer_mem_::hostPtr_, _pi_mem::is_buffer(), _pi_mem::is_sub_buffer(), _pi_mem::mem_, PI_BUFFER_CREATE_TYPE_REGION, PI_MEM_FLAGS_ACCESS_RW, and _pi_mem::mem_::buffer_mem_::ptr_.

Referenced by piPluginInit().

◆ cuda_piMemGetInfo()

pi_result cuda_piMemGetInfo ( pi_mem  ,
pi_mem_info  ,
size_t  ,
void *  ,
size_t *   
)

Definition at line 2442 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piMemImageCreate()

◆ cuda_piMemImageGetInfo()

pi_result cuda_piMemImageGetInfo ( pi_mem  ,
pi_image_info  ,
size_t  ,
void *  ,
size_t *   
)

\TODO Not implemented

Definition at line 3322 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piMemRelease()

pi_result cuda_piMemRelease ( pi_mem  memObj)

Decreases the reference count of the Mem object.

If this is zero, calls the relevant CUDA Free function

Returns
PI_SUCCESS unless deallocation error

Definition at line 2314 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::mem_::buffer_mem_::copy_in, _pi_mem::decrement_reference_count(), sycl::_V1::detail::pi::die(), _pi_mem::is_sub_buffer(), _pi_mem::mem_type_, _pi_mem::surface, and _pi_mem::mem_::buffer_mem_::use_host_ptr.

Referenced by piPluginInit(), and _pi_mem::~_pi_mem().

◆ cuda_piMemRetain()

pi_result cuda_piMemRetain ( pi_mem  mem)

◆ cuda_piPlatformGetInfo()

pi_result cuda_piPlatformGetInfo ( pi_platform  platform,
pi_platform_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piPlatformsGet()

pi_result cuda_piPlatformsGet ( pi_uint32  num_entries,
pi_platform platforms,
pi_uint32 num_platforms 
)

Obtains the CUDA platform.

There is only one CUDA platform, and contains all devices on the system. Triggers the CUDA Driver initialization (cuInit) the first time, so this must be the first PI API called.

However because multiple devices in a context is not currently supported, place each device in a separate platform.

Definition at line 848 of file pi_cuda.cpp.

References cuda_piDeviceGetInfo(), PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, and PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES.

Referenced by cuda_piextDeviceCreateWithNativeHandle(), cuda_piextUSMGetMemAllocInfo(), and piPluginInit().

◆ cuda_piProgramBuild()

pi_result cuda_piProgramBuild ( pi_program  program,
pi_uint32  num_devices,
const pi_device device_list,
const char *  options,
void(*)(pi_program program, void *user_data)  pfn_notify,
void *  user_data 
)

Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels).

See _pi_program for implementation details.

Definition at line 3348 of file pi_cuda.cpp.

References _pi_program::build_program(), and _pi_program::get_context().

Referenced by piPluginInit().

◆ cuda_piProgramCompile()

pi_result cuda_piProgramCompile ( pi_program  program,
pi_uint32  num_devices,
const pi_device device_list,
const char *  options,
pi_uint32  num_input_headers,
const pi_program input_headers,
const char **  header_include_names,
void(*)(pi_program program, void *user_data)  pfn_notify,
void *  user_data 
)

Creates a new program that is the outcome of the compilation of the headers and the program.

\TODO Implement asynchronous compilation

Definition at line 3526 of file pi_cuda.cpp.

References _pi_program::build_program(), and _pi_program::get_context().

Referenced by piPluginInit().

◆ cuda_piProgramCreate()

pi_result cuda_piProgramCreate ( pi_context  ,
const void *  ,
size_t  ,
pi_program  
)

\TODO Not implemented

Definition at line 3373 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piProgramCreateWithBinary()

pi_result cuda_piProgramCreateWithBinary ( pi_context  context,
pi_uint32  num_devices,
const pi_device device_list,
const size_t *  lengths,
const unsigned char **  binaries,
size_t  num_metadata_entries,
const pi_device_binary_property metadata,
pi_int32 binary_status,
pi_program program 
)

Loads images from a list of PTX or CUBIN binaries.

Note: No calls to CUDA driver API in this function, only store binaries for later.

Note: Only supports one device

Definition at line 3384 of file pi_cuda.cpp.

References _pi_device::get(), _pi_context::get_device(), and sycl::_V1::length().

Referenced by piPluginInit().

◆ cuda_piProgramGetBuildInfo()

pi_result cuda_piProgramGetBuildInfo ( pi_program  program,
pi_device  device,
pi_program_build_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piProgramGetInfo()

◆ cuda_piProgramLink()

pi_result cuda_piProgramLink ( pi_context  context,
pi_uint32  num_devices,
const pi_device device_list,
const char *  options,
pi_uint32  num_input_programs,
const pi_program input_programs,
void(*)(pi_program program, void *user_data)  pfn_notify,
void *  user_data,
pi_program ret_program 
)

Creates a new PI program object that is the outcome of linking all input programs.

\TODO Implement linker options, requires mapping of OpenCL to CUDA

Definition at line 3462 of file pi_cuda.cpp.

References _pi_program::binary_, and _pi_program::binarySizeInBytes_.

Referenced by piPluginInit().

◆ cuda_piProgramRelease()

pi_result cuda_piProgramRelease ( pi_program  program)

Decreases the reference count of a pi_program object.

When the reference count reaches 0, it unloads the module from the context.

Definition at line 3591 of file pi_cuda.cpp.

References _pi_program::decrement_reference_count(), _pi_program::get(), _pi_program::get_context(), and _pi_program::get_reference_count().

Referenced by piPluginInit(), and _pi_kernel::~_pi_kernel().

◆ cuda_piProgramRetain()

pi_result cuda_piProgramRetain ( pi_program  program)

◆ cuda_piQueueCreate()

pi_result cuda_piQueueCreate ( pi_context  context,
pi_device  device,
pi_queue_properties  properties,
pi_queue queue 
)

Creates a pi_queue object on the CUDA backend.

Valid properties

  • __SYCL_PI_CUDA_USE_DEFAULT_STREAM -> CU_STREAM_DEFAULT
  • __SYCL_PI_CUDA_SYNC_WITH_DEFAULT -> CU_STREAM_NON_BLOCKING
    Returns
    Pi queue object mapping to a CUStream

Definition at line 2484 of file pi_cuda.cpp.

References __SYCL_PI_CUDA_SYNC_WITH_DEFAULT, __SYCL_PI_CUDA_USE_DEFAULT_STREAM, _pi_queue::default_num_compute_streams, _pi_queue::default_num_transfer_streams, _pi_context::get_device(), and PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE.

Referenced by piPluginInit().

◆ cuda_piQueueFinish()

pi_result cuda_piQueueFinish ( pi_queue  command_queue)

Definition at line 2589 of file pi_cuda.cpp.

References _pi_queue::get_context(), and _pi_queue::sync_streams().

Referenced by piPluginInit().

◆ cuda_piQueueFlush()

pi_result cuda_piQueueFlush ( pi_queue  command_queue)

Definition at line 2617 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piQueueGetInfo()

pi_result cuda_piQueueGetInfo ( pi_queue  command_queue,
pi_queue_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piQueueRelease()

◆ cuda_piQueueRetain()

pi_result cuda_piQueueRetain ( pi_queue  command_queue)

Definition at line 2553 of file pi_cuda.cpp.

References _pi_queue::get_reference_count(), and _pi_queue::increment_reference_count().

Referenced by piPluginInit().

◆ cuda_piSamplerCreate()

pi_result cuda_piSamplerCreate ( pi_context  context,
const pi_sampler_properties sampler_properties,
pi_sampler result_sampler 
)

Creates a PI sampler object.

Parameters
[in]contextThe context the sampler is created for.
[in]sampler_propertiesThe properties for the sampler.
[out]result_samplerSet to the resulting sampler object.
Returns
PI_SUCCESS on success. PI_ERROR_INVALID_VALUE if given an invalid property or if there is multiple of properties from the same category.

Definition at line 4051 of file pi_cuda.cpp.

References PI_SAMPLER_ADDRESSING_MODE_CLAMP, PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, PI_SAMPLER_PROPERTIES_FILTER_MODE, PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS, and PI_TRUE.

Referenced by piPluginInit().

◆ cuda_piSamplerGetInfo()

pi_result cuda_piSamplerGetInfo ( pi_sampler  sampler,
pi_sampler_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

Gets information from a PI sampler object.

Parameters
[in]samplerThe sampler to get the information from.
[in]param_nameThe name of the information to get.
[in]param_value_sizeThe size of the param_value.
[out]param_valueSet to information value.
[out]param_value_size_retSet to the size of the information value.
Returns
PI_SUCCESS on success.

Definition at line 4110 of file pi_cuda.cpp.

References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_sampler::context_, _pi_sampler::get_reference_count(), PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_INFO_ADDRESSING_MODE, PI_SAMPLER_INFO_CONTEXT, PI_SAMPLER_INFO_FILTER_MODE, PI_SAMPLER_INFO_NORMALIZED_COORDS, PI_SAMPLER_INFO_REFERENCE_COUNT, and _pi_sampler::props_.

Referenced by piPluginInit().

◆ cuda_piSamplerRelease()

pi_result cuda_piSamplerRelease ( pi_sampler  sampler)

Releases a PI sampler object, decrementing its reference count.

If the reference count reaches zero, the sampler object is destroyed.

Parameters
[in]samplerThe sampler to decrement the reference count of.
Returns
PI_SUCCESS.

Definition at line 4163 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::assertion(), _pi_sampler::decrement_reference_count(), and _pi_sampler::get_reference_count().

Referenced by piPluginInit().

◆ cuda_piSamplerRetain()

pi_result cuda_piSamplerRetain ( pi_sampler  sampler)

Retains a PI sampler object, incrementing its reference count.

Parameters
[in]samplerThe sampler to increment the reference count of.
Returns
PI_SUCCESS.

Definition at line 4151 of file pi_cuda.cpp.

References _pi_sampler::increment_reference_count().

Referenced by piPluginInit().

◆ cuda_piTearDown()

pi_result cuda_piTearDown ( void *  )

Definition at line 5402 of file pi_cuda.cpp.

References disableCUDATracing().

Referenced by piPluginInit().

◆ disableCUDATracing()

void disableCUDATracing ( )

Definition at line 101 of file tracing.cpp.

References CUDA_CALL_STREAM_NAME, and CUDA_DEBUG_STREAM_NAME.

Referenced by cuda_piTearDown().

◆ enableCUDATracing()

void enableCUDATracing ( )

Definition at line 70 of file tracing.cpp.

References CUDA_CALL_STREAM_NAME, CUDA_DEBUG_STREAM_NAME, GMajVer, GMinVer, and GVerStr.

Referenced by piPluginInit().

◆ enqueueEventWait()

pi_result enqueueEventWait ( pi_queue  queue,
pi_event  event 
)

Definition at line 645 of file pi_cuda.cpp.

References _pi_queue::for_each_stream().

◆ get_kernel_metadata()

bool get_kernel_metadata ( std::string  metadataName,
const char *  tag,
std::string &  kernelName 
)

Definition at line 663 of file pi_cuda.cpp.

Referenced by _pi_program::set_metadata().

◆ getKernelNames()

std::string getKernelNames ( pi_program  )

Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this.

Note: This is currently only being used by the SYCL program class for the has_kernel method, so an alternative would be to move the has_kernel query to PI and use cuModuleGetFunction to check for a kernel. Note: Another alternative is to add kernel names as metadata, like with reqd_work_group_size.

Definition at line 753 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by cuda_piProgramGetInfo().

◆ imageElementByteSize()

static size_t imageElementByteSize ( CUDA_ARRAY_DESCRIPTOR  array_desc)
static

◆ piPluginInit()

pi_result piPluginInit ( pi_plugin PluginInit)

Definition at line 5409 of file pi_cuda.cpp.

Variable Documentation

◆ SupportedVersion

const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING

Definition at line 5407 of file pi_cuda.cpp.

Referenced by piPluginInit().