DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_cuda.cpp File Reference
#include <CL/sycl/detail/cuda_definitions.hpp>
#include <CL/sycl/detail/defines.hpp>
#include <CL/sycl/detail/pi.hpp>
#include <pi_cuda.hpp>
#include <algorithm>
#include <cassert>
#include <cuda.h>
#include <cuda_device_runtime_api.h>
#include <limits>
#include <memory>
#include <mutex>
#include <regex>
Include dependency graph for pi_cuda.cpp:

Go to the source code of this file.

Classes

class  ReleaseGuard< T >
 RAII object that calls the reference count release function on the held PI object on destruction. More...
 

Namespaces

 cl
 We provide new interfaces for matrix muliply in this patch:
 
 cl::sycl
 
 cl::sycl::detail
 
 cl::sycl::detail::pi
 

Macros

#define _PI_CL(pi_api, cuda_api)   (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);
 

Functions

void cl::sycl::detail::pi::die (const char *Message)
 
void cl::sycl::detail::pi::cuPrint (const char *Message)
 
void cl::sycl::detail::pi::assertion (bool Condition, const char *Message=nullptr)
 
pi_result cuda_piEnqueueEventsWait (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Enqueues a wait on the given CUstream for all events. More...
 
pi_result cuda_piEnqueueEventsWaitWithBarrier (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue. More...
 
pi_result cuda_piEventRelease (pi_event event)
 
pi_result cuda_piEventRetain (pi_event event)
 
pi_result enqueueEventWait (pi_queue queue, pi_event event)
 
bool get_kernel_metadata (std::string metadataName, const char *tag, std::string &kernelName)
 
std::string getKernelNames (pi_program)
 Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this. More...
 
pi_result cuda_piDeviceGetInfo (pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piPlatformsGet (pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
 Obtains the CUDA platform. More...
 
pi_result cuda_piPlatformGetInfo (pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piDevicesGet (pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
 
pi_result cuda_piDeviceRetain (pi_device)
 
pi_result cuda_piContextGetInfo (pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piContextRetain (pi_context context)
 
pi_result cuda_piextContextSetExtendedDeleter (pi_context context, pi_context_extended_deleter function, void *user_data)
 
pi_result cuda_piDevicePartition (pi_device, const cl_device_partition_property *, pi_uint32, pi_device *, pi_uint32 *)
 Not applicable to CUDA, devices cannot be partitioned. More...
 
pi_result cuda_piextDeviceSelectBinary (pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary)
 
pi_result cuda_piextGetDeviceFunctionPointer (pi_device device, pi_program program, const char *func_name, pi_uint64 *func_pointer_ret)
 
pi_result cuda_piDeviceRelease (pi_device)
 
pi_result cuda_piextDeviceGetNativeHandle (pi_device device, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI device object. More...
 
pi_result cuda_piextDeviceCreateWithNativeHandle (pi_native_handle, pi_platform, pi_device *)
 Created a PI device object from a CUDA device handle. More...
 
pi_result cuda_piContextCreate (const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data), void *user_data, pi_context *retcontext)
 Create a PI CUDA context. More...
 
pi_result cuda_piContextRelease (pi_context ctxt)
 
pi_result cuda_piextContextGetNativeHandle (pi_context context, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI context object. More...
 
pi_result cuda_piextContextCreateWithNativeHandle (pi_native_handle, pi_uint32, const pi_device *, bool, pi_context *)
 Created a PI context object from a CUDA context handle. More...
 
pi_result cuda_piMemBufferCreate (pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties)
 Creates a PI Memory object using a CUDA memory allocation. More...
 
pi_result cuda_piMemRelease (pi_mem memObj)
 Decreases the reference count of the Mem object. More...
 
pi_result cuda_piMemBufferPartition (pi_mem parent_buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *memObj)
 Implements a buffer partition in the CUDA backend. More...
 
pi_result cuda_piMemGetInfo (pi_mem, pi_mem_info, size_t, void *, size_t *)
 
pi_result cuda_piextMemGetNativeHandle (pi_mem mem, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI mem object. More...
 
pi_result cuda_piextMemCreateWithNativeHandle (pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *mem)
 Created a PI mem object from a CUDA mem handle. More...
 
pi_result cuda_piQueueCreate (pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
 Creates a pi_queue object on the CUDA backend. More...
 
pi_result cuda_piQueueGetInfo (pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piQueueRetain (pi_queue command_queue)
 
pi_result cuda_piQueueRelease (pi_queue command_queue)
 
pi_result cuda_piQueueFinish (pi_queue command_queue)
 
pi_result cuda_piQueueFlush (pi_queue command_queue)
 
pi_result cuda_piextQueueGetNativeHandle (pi_queue queue, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI queue object. More...
 
pi_result cuda_piextQueueCreateWithNativeHandle (pi_native_handle, pi_context, pi_device, bool ownNativeHandle, pi_queue *)
 Created a PI queue object from a CUDA queue handle. More...
 
pi_result cuda_piEnqueueMemBufferWrite (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferRead (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEventsWait (pi_uint32 num_events, const pi_event *event_list)
 
pi_result cuda_piKernelCreate (pi_program program, const char *kernel_name, pi_kernel *kernel)
 
pi_result cuda_piKernelSetArg (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
 
pi_result cuda_piextKernelSetArgMemObj (pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
 
pi_result cuda_piextKernelSetArgSampler (pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
 
pi_result cuda_piKernelGetGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piEnqueueKernelLaunch (pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueNativeKernel (pi_queue, void(*)(void *), void *, size_t, pi_uint32, const pi_mem *, const void **, pi_uint32, const pi_event *, pi_event *)
 \TODO Not implemented More...
 
pi_result cuda_piextKernelCreateWithNativeHandle (pi_native_handle, pi_context, pi_program, bool, pi_kernel *)
 
pi_result cuda_piMemImageCreate (pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
 \TODO Not implemented More...
 
pi_result cuda_piMemImageGetInfo (pi_mem, pi_image_info, size_t, void *, size_t *)
 \TODO Not implemented More...
 
pi_result cuda_piMemRetain (pi_mem mem)
 
pi_result cuda_piclProgramCreateWithSource (pi_context, pi_uint32, const char **, const size_t *, pi_program *)
 Not used as CUDA backend only creates programs from binary. More...
 
pi_result cuda_piProgramBuild (pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
 Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels). More...
 
pi_result cuda_piProgramCreate (pi_context, const void *, size_t, pi_program *)
 \TODO Not implemented More...
 
pi_result cuda_piProgramCreateWithBinary (pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *program)
 Loads images from a list of PTX or CUBIN binaries. More...
 
pi_result cuda_piProgramGetInfo (pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piProgramLink (pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
 Creates a new PI program object that is the outcome of linking all input programs. More...
 
pi_result cuda_piProgramCompile (pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
 Creates a new program that is the outcome of the compilation of the headers and the program. More...
 
pi_result cuda_piProgramGetBuildInfo (pi_program program, pi_device device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piProgramRetain (pi_program program)
 
pi_result cuda_piProgramRelease (pi_program program)
 Decreases the reference count of a pi_program object. More...
 
pi_result cuda_piextProgramGetNativeHandle (pi_program program, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI program object. More...
 
pi_result cuda_piextProgramCreateWithNativeHandle (pi_native_handle, pi_context, bool, pi_program *)
 Created a PI program object from a CUDA program handle. More...
 
pi_result cuda_piKernelGetInfo (pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piKernelGetSubGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piKernelRetain (pi_kernel kernel)
 
pi_result cuda_piKernelRelease (pi_kernel kernel)
 
pi_result cuda_piKernelSetExecInfo (pi_kernel, pi_kernel_exec_info, size_t, const void *)
 
pi_result cuda_piextProgramSetSpecializationConstant (pi_program, pi_uint32, size_t, const void *)
 
pi_result cuda_piextKernelSetArgPointer (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
 
pi_result cuda_piEventCreate (pi_context, pi_event *)
 
pi_result cuda_piEventGetInfo (pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piEventGetProfilingInfo (pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 Obtain profiling information from PI CUDA events \TODO Untie from OpenCL, timings from CUDA are only elapsed time. More...
 
pi_result cuda_piEventSetCallback (pi_event, pi_int32, pfn_notify, void *)
 
pi_result cuda_piEventSetStatus (pi_event, pi_int32)
 
pi_result cuda_piextEventGetNativeHandle (pi_event event, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI event object. More...
 
pi_result cuda_piextEventCreateWithNativeHandle (pi_native_handle, pi_context, bool, pi_event *)
 Created a PI event object from a CUDA event handle. More...
 
pi_result cuda_piSamplerCreate (pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
 Creates a PI sampler object. More...
 
pi_result cuda_piSamplerGetInfo (pi_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 Gets information from a PI sampler object. More...
 
pi_result cuda_piSamplerRetain (pi_sampler sampler)
 Retains a PI sampler object, incrementing its reference count. More...
 
pi_result cuda_piSamplerRelease (pi_sampler sampler)
 Releases a PI sampler object, decrementing its reference count. More...
 
static pi_result commonEnqueueMemBufferCopyRect (CUstream cu_stream, pi_buff_rect_region region, const void *src_ptr, const CUmemorytype_enum src_type, pi_buff_rect_offset src_offset, size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr, const CUmemorytype_enum dst_type, pi_buff_rect_offset dst_offset, size_t dst_row_pitch, size_t dst_slice_pitch)
 General 3D memory copy operation. More...
 
pi_result cuda_piEnqueueMemBufferReadRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferWriteRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferCopy (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferCopyRect (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferFill (pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
static size_t imageElementByteSize (CUDA_ARRAY_DESCRIPTOR array_desc)
 
static pi_result commonEnqueueMemImageNDCopy (CUstream cu_stream, pi_mem_type img_type, const size_t *region, const void *src_ptr, const CUmemorytype_enum src_type, const size_t *src_offset, void *dst_ptr, const CUmemorytype_enum dst_type, const size_t *dst_offset)
 General ND memory copy operation for images (where N > 1). More...
 
pi_result cuda_piEnqueueMemImageRead (pi_queue command_queue, pi_mem image, pi_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageWrite (pi_queue command_queue, pi_mem image, pi_bool blocking_write, const size_t *origin, const size_t *region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageCopy (pi_queue command_queue, pi_mem src_image, pi_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageFill (pi_queue, pi_mem, const void *, const size_t *, const size_t *, pi_uint32, const pi_event *, pi_event *)
 \TODO Not implemented in CUDA, requires untie from OpenCL More...
 
pi_result cuda_piEnqueueMemBufferMap (pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
 Implements mapping on the host using a BufferRead operation. More...
 
pi_result cuda_piEnqueueMemUnmap (pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Implements the unmap from the host, using a BufferWrite operation. More...
 
pi_result cuda_piextUSMHostAlloc (void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
 USM: Implements USM Host allocations using CUDA Pinned Memory. More...
 
pi_result cuda_piextUSMDeviceAlloc (void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
 USM: Implements USM device allocations using a normal CUDA device pointer. More...
 
pi_result cuda_piextUSMSharedAlloc (void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
 USM: Implements USM Shared allocations using CUDA Managed Memory. More...
 
pi_result cuda_piextUSMFree (pi_context context, void *ptr)
 USM: Frees the given USM pointer associated with the context. More...
 
pi_result cuda_piextUSMEnqueueMemset (pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueueMemcpy (pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueuePrefetch (pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueueMemAdvise (pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
 USM: memadvise API to govern behavior of automatic migration mechanisms. More...
 
pi_result cuda_piextUSMGetMemAllocInfo (pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation. More...
 
pi_result cuda_piTearDown (void *)
 
pi_result piPluginInit (pi_plugin *PluginInit)
 

Variables

const char SupportedVersion [] = _PI_H_VERSION_STRING
 

Detailed Description

Implementation of CUDA Plugin.

Definition in file pi_cuda.cpp.

Macro Definition Documentation

◆ _PI_CL

#define _PI_CL (   pi_api,
  cuda_api 
)    (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);

Function Documentation

◆ commonEnqueueMemBufferCopyRect()

static pi_result commonEnqueueMemBufferCopyRect ( CUstream  cu_stream,
pi_buff_rect_region  region,
const void *  src_ptr,
const CUmemorytype_enum  src_type,
pi_buff_rect_offset  src_offset,
size_t  src_row_pitch,
size_t  src_slice_pitch,
void *  dst_ptr,
const CUmemorytype_enum  dst_type,
pi_buff_rect_offset  dst_offset,
size_t  dst_row_pitch,
size_t  dst_slice_pitch 
)
static

General 3D memory copy operation.

This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is on the device, src_ptr and/or dst_ptr must be a pointer to a CUdeviceptr

Definition at line 3869 of file pi_cuda.cpp.

References pi_buff_rect_region_struct::depth_scalar, pi_buff_rect_region_struct::height_scalar, pi_buff_rect_region_struct::width_bytes, pi_buff_rect_offset_struct::x_bytes, pi_buff_rect_offset_struct::y_scalar, and pi_buff_rect_offset_struct::z_scalar.

Referenced by cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferReadRect(), and cuda_piEnqueueMemBufferWriteRect().

◆ commonEnqueueMemImageNDCopy()

static pi_result commonEnqueueMemImageNDCopy ( CUstream  cu_stream,
pi_mem_type  img_type,
const size_t *  region,
const void *  src_ptr,
const CUmemorytype_enum  src_type,
const size_t *  src_offset,
void *  dst_ptr,
const CUmemorytype_enum  dst_type,
const size_t *  dst_offset 
)
static

General ND memory copy operation for images (where N > 1).

This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is an array, src_ptr and/or dst_ptr must be a pointer to a CUarray

Definition at line 4241 of file pi_cuda.cpp.

References PI_INVALID_VALUE, PI_MEM_TYPE_IMAGE2D, and PI_MEM_TYPE_IMAGE3D.

Referenced by cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), and cuda_piEnqueueMemImageWrite().

◆ cuda_piclProgramCreateWithSource()

pi_result cuda_piclProgramCreateWithSource ( pi_context  ,
pi_uint32  ,
const char **  ,
const size_t *  ,
pi_program  
)

Not used as CUDA backend only creates programs from binary.

See cuda_piclProgramCreateWithBinary.

Definition at line 3075 of file pi_cuda.cpp.

References cl::sycl::detail::pi::cuPrint(), and PI_INVALID_OPERATION.

Referenced by piPluginInit().

◆ cuda_piContextCreate()

pi_result cuda_piContextCreate ( const pi_context_properties properties,
pi_uint32  num_devices,
const pi_device devices,
void(*)(const char *errinfo, const void *private_info, size_t cb, void *user_data)  pfn_notify,
void *  user_data,
pi_context retcontext 
)

Create a PI CUDA context.

By default creates a scoped context and keeps the last active CUDA context on top of the CUDA context stack. With the __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE creates a primary CUDA context and activates it on the CUDA context stack.

Parameters
[in]properties0 terminated array of key/id-value combinations. Can be nullptr. Only accepts property key/id __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY with a pi_bool value.
[in]num_devicesNumber of devices to create the context for.
[in]devicesDevices to create the context for.
[in]pfn_notifyCallback, currently unused.
[in]user_dataUser data for callback.
[out]retcontextSet to created context on success.
Returns
PI_SUCCESS on success, otherwise an error return code.

Definition at line 1886 of file pi_cuda.cpp.

References __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, cl::sycl::detail::pi::die(), _pi_platform::evBase_, std::get(), PI_FALSE, PI_INVALID_VALUE, PI_OUT_OF_RESOURCES, PI_SUCCESS, PI_TRUE, _pi_context::primary, and _pi_context::user_defined.

Referenced by piPluginInit().

◆ cuda_piContextGetInfo()

◆ cuda_piContextRelease()

◆ cuda_piContextRetain()

pi_result cuda_piContextRetain ( pi_context  context)

Definition at line 959 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piDeviceGetInfo()

pi_result cuda_piDeviceGetInfo ( pi_device  device,
pi_device_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

Definition at line 1039 of file pi_cuda.cpp.

References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, cl::sycl::detail::pi::assertion(), cl::sycl::detail::pi::cuPrint(), cl::sycl::detail::pi::die(), min(), PI_DEVICE_INFO_ADDRESS_BITS, PI_DEVICE_INFO_ATOMIC_64, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, PI_DEVICE_INFO_AVAILABLE, PI_DEVICE_INFO_BACKEND_VERSION, PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, PI_DEVICE_INFO_BUILT_IN_KERNELS, PI_DEVICE_INFO_COMPILER_AVAILABLE, PI_DEVICE_INFO_DOUBLE_FP_CONFIG, PI_DEVICE_INFO_DRIVER_VERSION, PI_DEVICE_INFO_ENDIAN_LITTLE, PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT, PI_DEVICE_INFO_EXECUTION_CAPABILITIES, PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT, PI_DEVICE_INFO_EXTENSIONS, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE, PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_SIZE, PI_DEVICE_INFO_GPU_EU_COUNT, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, PI_DEVICE_INFO_GPU_SLICES, PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, PI_DEVICE_INFO_HALF_FP_CONFIG, PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_SUPPORT, PI_DEVICE_INFO_LINKER_AVAILABLE, PI_DEVICE_INFO_LOCAL_MEM_SIZE, PI_DEVICE_INFO_LOCAL_MEM_TYPE, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_MAX_COMPUTE_UNITS, PI_DEVICE_INFO_MAX_CONSTANT_ARGS, PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE, PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, PI_DEVICE_INFO_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, PI_DEVICE_INFO_MAX_SAMPLERS, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, PI_DEVICE_INFO_NAME, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_OPENCL_C_VERSION, PI_DEVICE_INFO_PARENT_DEVICE, PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, PI_DEVICE_INFO_PARTITION_PROPERTIES, PI_DEVICE_INFO_PARTITION_TYPE, PI_DEVICE_INFO_PCI_ADDRESS, PI_DEVICE_INFO_PLATFORM, PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, PI_DEVICE_INFO_PROFILE, PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION, PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES, PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES, PI_DEVICE_INFO_REFERENCE_COUNT, PI_DEVICE_INFO_SINGLE_FP_CONFIG, PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, PI_DEVICE_INFO_TYPE, PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, PI_DEVICE_INFO_USM_DEVICE_SUPPORT, PI_DEVICE_INFO_USM_HOST_SUPPORT, PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, PI_DEVICE_INFO_UUID, PI_DEVICE_INFO_VENDOR, PI_DEVICE_INFO_VENDOR_ID, PI_DEVICE_INFO_VERSION, PI_DEVICE_LOCAL_MEM_TYPE_LOCAL, PI_DEVICE_TYPE_GPU, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, PI_FALSE, PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT, PI_FP_DENORM, PI_FP_FMA, PI_FP_INF_NAN, PI_FP_ROUND_TO_INF, PI_FP_ROUND_TO_NEAREST, PI_FP_ROUND_TO_ZERO, PI_INVALID_VALUE, PI_MEMORY_ORDER_ACQ_REL, PI_MEMORY_ORDER_ACQUIRE, PI_MEMORY_ORDER_RELAXED, PI_MEMORY_ORDER_RELEASE, PI_MEMORY_SCOPE_DEVICE, PI_MEMORY_SCOPE_SUB_GROUP, PI_MEMORY_SCOPE_SYSTEM, PI_MEMORY_SCOPE_WORK_GROUP, PI_MEMORY_SCOPE_WORK_ITEM, PI_TRUE, PI_USM_ACCESS, PI_USM_ATOMIC_ACCESS, PI_USM_CONCURRENT_ACCESS, and PI_USM_CONCURRENT_ATOMIC_ACCESS.

Referenced by cuda_piPlatformsGet(), and piPluginInit().

◆ cuda_piDevicePartition()

pi_result cuda_piDevicePartition ( pi_device  ,
const cl_device_partition_property *  ,
pi_uint32  ,
pi_device ,
pi_uint32  
)

Not applicable to CUDA, devices cannot be partitioned.

TODO: untie cl_device_partition_property from OpenCL

Definition at line 976 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piDeviceRelease()

pi_result cuda_piDeviceRelease ( pi_device  )
Returns
PI_SUCCESS always since CUDA devices are always root devices.

Definition at line 1037 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit(), _pi_context::~_pi_context(), and _pi_queue::~_pi_queue().

◆ cuda_piDeviceRetain()

pi_result cuda_piDeviceRetain ( pi_device  )
Returns
PI_SUCCESS if the function is executed successfully CUDA devices are always root devices so retain always returns success.

Definition at line 915 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piDevicesGet()

pi_result cuda_piDevicesGet ( pi_platform  platform,
pi_device_type  device_type,
pi_uint32  num_entries,
pi_device devices,
pi_uint32 num_devices 
)
Parameters
devicesList of devices available on the system
num_devicesNumber of elements in the list of devices Requesting a non-GPU device triggers an error, all PI CUDA devices are GPUs.

Definition at line 883 of file pi_cuda.cpp.

References PI_DEVICE_TYPE_DEFAULT, PI_DEVICE_TYPE_GPU, PI_OUT_OF_RESOURCES, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piEnqueueEventsWait()

pi_result cuda_piEnqueueEventsWait ( pi_queue  command_queue,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Enqueues a wait on the given CUstream for all events.

See enqueueEventWait TODO: Add support for multiple streams once the Event class is properly refactored.

Definition at line 3644 of file pi_cuda.cpp.

References cuda_piEnqueueEventsWaitWithBarrier().

Referenced by cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemUnmap(), and piPluginInit().

◆ cuda_piEnqueueEventsWaitWithBarrier()

pi_result cuda_piEnqueueEventsWaitWithBarrier ( pi_queue  command_queue,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue.

Parameters
[in]command_queueA valid PI queue.
[in]num_events_in_wait_listNumber of events in event_wait_list.
[in]event_wait_listEvents to wait on.
[out]eventEvent for when all events in event_wait_list have finished or, if event_wait_list is empty, when all previous events in the queue have finished.
Returns
TBD

Definition at line 3664 of file pi_cuda.cpp.

References enqueueEventWait(), _pi_queue::get_context(), _pi_queue::get_next_compute_stream(), _pi_event::make_native(), PI_COMMAND_TYPE_MARKER, PI_ERROR_UNKNOWN, PI_INVALID_QUEUE, and PI_SUCCESS.

Referenced by cuda_piEnqueueEventsWait(), and piPluginInit().

◆ cuda_piEnqueueKernelLaunch()

pi_result cuda_piEnqueueKernelLaunch ( pi_queue  command_queue,
pi_kernel  kernel,
pi_uint32  work_dim,
const size_t *  global_work_offset,
const size_t *  global_work_size,
const size_t *  local_work_size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferCopy()

pi_result cuda_piEnqueueMemBufferCopy ( pi_queue  command_queue,
pi_mem  src_buffer,
pi_mem  dst_buffer,
size_t  src_offset,
size_t  dst_offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferCopyRect()

pi_result cuda_piEnqueueMemBufferCopyRect ( pi_queue  command_queue,
pi_mem  src_buffer,
pi_mem  dst_buffer,
pi_buff_rect_offset  src_origin,
pi_buff_rect_offset  dst_origin,
pi_buff_rect_region  region,
size_t  src_row_pitch,
size_t  src_slice_pitch,
size_t  dst_row_pitch,
size_t  dst_slice_pitch,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferFill()

pi_result cuda_piEnqueueMemBufferFill ( pi_queue  command_queue,
pi_mem  buffer,
const void *  pattern,
size_t  pattern_size,
size_t  offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferMap()

pi_result cuda_piEnqueueMemBufferMap ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_map,
pi_map_flags  map_flags,
size_t  offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event,
void **  ret_map 
)

Implements mapping on the host using a BufferRead operation.

Mapped pointers are stored in the pi_mem object. If the buffer uses pinned host memory a pointer to that memory is returned and no read operation is done. \TODO Untie types from OpenCL

Definition at line 4522 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::allocMode_, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_, cuda_piEnqueueEventsWait(), cuda_piEnqueueMemBufferRead(), _pi_queue::get_context(), _pi_mem::mem_::buffer_mem_::get_map_ptr(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_::buffer_mem_::map_to_ptr(), _pi_mem::mem_, _pi_mem::mem_type_, PI_COMMAND_TYPE_MEM_BUFFER_MAP, PI_INVALID_OPERATION, PI_MAP_READ, PI_MAP_WRITE, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piEnqueueMemBufferRead()

pi_result cuda_piEnqueueMemBufferRead ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_read,
size_t  offset,
size_t  size,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferReadRect()

pi_result cuda_piEnqueueMemBufferReadRect ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_read,
pi_buff_rect_offset  buffer_offset,
pi_buff_rect_offset  host_offset,
pi_buff_rect_region  region,
size_t  buffer_row_pitch,
size_t  buffer_slice_pitch,
size_t  host_row_pitch,
size_t  host_slice_pitch,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferWrite()

pi_result cuda_piEnqueueMemBufferWrite ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_write,
size_t  offset,
size_t  size,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferWriteRect()

pi_result cuda_piEnqueueMemBufferWriteRect ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_write,
pi_buff_rect_offset  buffer_offset,
pi_buff_rect_offset  host_offset,
pi_buff_rect_region  region,
size_t  buffer_row_pitch,
size_t  buffer_slice_pitch,
size_t  host_row_pitch,
size_t  host_slice_pitch,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageCopy()

pi_result cuda_piEnqueueMemImageCopy ( pi_queue  command_queue,
pi_mem  src_image,
pi_mem  dst_image,
const size_t *  src_origin,
const size_t *  dst_origin,
const size_t *  region,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageFill()

pi_result cuda_piEnqueueMemImageFill ( pi_queue  ,
pi_mem  ,
const void *  ,
const size_t *  ,
const size_t *  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

\TODO Not implemented in CUDA, requires untie from OpenCL

Definition at line 4509 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEnqueueMemImageRead()

pi_result cuda_piEnqueueMemImageRead ( pi_queue  command_queue,
pi_mem  image,
pi_bool  blocking_read,
const size_t *  origin,
const size_t *  region,
size_t  row_pitch,
size_t  slice_pitch,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageWrite()

pi_result cuda_piEnqueueMemImageWrite ( pi_queue  command_queue,
pi_mem  image,
pi_bool  blocking_write,
const size_t *  origin,
const size_t *  region,
size_t  input_row_pitch,
size_t  input_slice_pitch,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemUnmap()

pi_result cuda_piEnqueueMemUnmap ( pi_queue  command_queue,
pi_mem  memobj,
void *  mapped_ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueNativeKernel()

pi_result cuda_piEnqueueNativeKernel ( pi_queue  ,
void(*)(void *)  ,
void *  ,
size_t  ,
pi_uint32  ,
const pi_mem ,
const void **  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

\TODO Not implemented

Definition at line 2888 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventCreate()

pi_result cuda_piEventCreate ( pi_context  ,
pi_event  
)

Definition at line 3526 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventGetInfo()

pi_result cuda_piEventGetInfo ( pi_event  event,
pi_event_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piEventGetProfilingInfo()

pi_result cuda_piEventGetProfilingInfo ( pi_event  event,
pi_profiling_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

Obtain profiling information from PI CUDA events \TODO Untie from OpenCL, timings from CUDA are only elapsed time.

Definition at line 3561 of file pi_cuda.cpp.

References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, cl::sycl::detail::pi::die(), PI_PROFILING_INFO_COMMAND_END, PI_PROFILING_INFO_COMMAND_QUEUED, PI_PROFILING_INFO_COMMAND_START, PI_PROFILING_INFO_COMMAND_SUBMIT, PI_PROFILING_INFO_NOT_AVAILABLE, and PI_QUEUE_PROFILING_ENABLE.

Referenced by piPluginInit().

◆ cuda_piEventRelease()

pi_result cuda_piEventRelease ( pi_event  event)

◆ cuda_piEventRetain()

pi_result cuda_piEventRetain ( pi_event  event)

Definition at line 3602 of file pi_cuda.cpp.

References cl::sycl::detail::pi::assertion(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piEventSetCallback()

pi_result cuda_piEventSetCallback ( pi_event  ,
pi_int32  ,
pfn_notify  ,
void *   
)

Definition at line 3592 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piEventSetStatus()

pi_result cuda_piEventSetStatus ( pi_event  ,
pi_int32   
)

Definition at line 3597 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die(), and PI_INVALID_VALUE.

Referenced by piPluginInit().

◆ cuda_piEventsWait()

pi_result cuda_piEventsWait ( pi_uint32  num_events,
const pi_event event_list 
)

◆ cuda_piextContextCreateWithNativeHandle()

pi_result cuda_piextContextCreateWithNativeHandle ( pi_native_handle  ,
pi_uint32  ,
const pi_device ,
bool  ,
pi_context  
)

Created a PI context object from a CUDA context handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI context object from.
[out]contextSet to the PI context object created from native handle.
Returns
TBD

Definition at line 2028 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextContextGetNativeHandle()

pi_result cuda_piextContextGetNativeHandle ( pi_context  context,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI context object.

Parameters
[in]contextThe PI context to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI context object.
Returns
PI_SUCCESS

Definition at line 2014 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextContextSetExtendedDeleter()

pi_result cuda_piextContextSetExtendedDeleter ( pi_context  context,
pi_context_extended_deleter  function,
void *  user_data 
)

Definition at line 967 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextDeviceCreateWithNativeHandle()

pi_result cuda_piextDeviceCreateWithNativeHandle ( pi_native_handle  ,
pi_platform  ,
pi_device  
)

Created a PI device object from a CUDA device handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI device object from.
[in]platformis the PI platform of the device.
[out]deviceSet to the PI device object created from native handle.
Returns
TBD

Definition at line 1859 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextDeviceGetNativeHandle()

pi_result cuda_piextDeviceGetNativeHandle ( pi_device  device,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI device object.

Parameters
[in]deviceThe PI device to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI device object.
Returns
PI_SUCCESS

Definition at line 1844 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextDeviceSelectBinary()

pi_result cuda_piextDeviceSelectBinary ( pi_device  device,
pi_device_binary binaries,
pi_uint32  num_binaries,
pi_uint32 selected_binary 
)
Returns
If available, the first binary that is PTX

Definition at line 984 of file pi_cuda.cpp.

References __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64, cl::sycl::detail::pi::die(), PI_INVALID_BINARY, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextEventCreateWithNativeHandle()

pi_result cuda_piextEventCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
bool  ,
pi_event  
)

Created a PI event object from a CUDA event handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI event object from.
[out]eventSet to the PI event object created from native handle.
Returns
TBD

Definition at line 3722 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextEventGetNativeHandle()

pi_result cuda_piextEventGetNativeHandle ( pi_event  event,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI event object.

Parameters
[in]eventThe PI event to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI event object.
Returns
PI_SUCCESS on success. PI_INVALID_EVENT if given a user event.

Definition at line 3708 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextGetDeviceFunctionPointer()

pi_result cuda_piextGetDeviceFunctionPointer ( pi_device  device,
pi_program  program,
const char func_name,
pi_uint64 func_pointer_ret 
)

◆ cuda_piextKernelCreateWithNativeHandle()

pi_result cuda_piextKernelCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
pi_program  ,
bool  ,
pi_kernel  
)

Definition at line 2895 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextKernelSetArgMemObj()

◆ cuda_piextKernelSetArgPointer()

pi_result cuda_piextKernelSetArgPointer ( pi_kernel  kernel,
pi_uint32  arg_index,
size_t  arg_size,
const void *  arg_value 
)

Definition at line 3516 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextKernelSetArgSampler()

pi_result cuda_piextKernelSetArgSampler ( pi_kernel  kernel,
pi_uint32  arg_index,
const pi_sampler arg_value 
)

Definition at line 2672 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextMemCreateWithNativeHandle()

pi_result cuda_piextMemCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_context  context,
bool  ownNativeHandle,
pi_mem mem 
)

Created a PI mem object from a CUDA mem handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI mem object from.
[in]contextThe PI context of the memory allocation.
[in]ownNativeHandleIndicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT.
[out]memSet to the PI mem object created from native handle.
Returns
TBD

Definition at line 2270 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextMemGetNativeHandle()

pi_result cuda_piextMemGetNativeHandle ( pi_mem  mem,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI mem object.

Parameters
[in]memThe PI mem to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI mem object.
Returns
PI_SUCCESS

Definition at line 2253 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), _pi_mem::mem_, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextProgramCreateWithNativeHandle()

pi_result cuda_piextProgramCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
bool  ,
pi_program  
)

Created a PI program object from a CUDA program handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI program object from.
[in]contextThe PI context of the program.
[out]programSet to the PI program object created from native handle.
Returns
TBD

Definition at line 3379 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextProgramGetNativeHandle()

pi_result cuda_piextProgramGetNativeHandle ( pi_program  program,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI program object.

Parameters
[in]programThe PI program to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI program object.
Returns
TBD

Definition at line 3364 of file pi_cuda.cpp.

References _pi_program::get(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextProgramSetSpecializationConstant()

pi_result cuda_piextProgramSetSpecializationConstant ( pi_program  ,
pi_uint32  ,
size_t  ,
const void *   
)

Definition at line 3507 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextQueueCreateWithNativeHandle()

pi_result cuda_piextQueueCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
pi_device  ,
bool  ownNativeHandle,
pi_queue  
)

Created a PI queue object from a CUDA queue handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI queue object from.
[in]contextis the PI context of the queue.
[out]queueSet to the PI queue object created from native handle.
ownNativeHandletells if SYCL RT should assume the ownership of the native handle, if it can.
Returns
TBD

Definition at line 2445 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextQueueGetNativeHandle()

pi_result cuda_piextQueueGetNativeHandle ( pi_queue  queue,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI queue object.

Parameters
[in]queueThe PI queue to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI queue object.
Returns
PI_SUCCESS

Definition at line 2426 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextUSMDeviceAlloc()

pi_result cuda_piextUSMDeviceAlloc ( void **  result_ptr,
pi_context  context,
pi_device  device,
pi_usm_mem_properties properties,
size_t  size,
pi_uint32  alignment 
)

USM: Implements USM device allocations using a normal CUDA device pointer.

Definition at line 4659 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueueMemAdvise()

◆ cuda_piextUSMEnqueueMemcpy()

pi_result cuda_piextUSMEnqueueMemcpy ( pi_queue  queue,
pi_bool  blocking,
void *  dst_ptr,
const void *  src_ptr,
size_t  size,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

Definition at line 4767 of file pi_cuda.cpp.

References _pi_event::make_native(), PI_COMMAND_TYPE_MEM_BUFFER_COPY, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueueMemset()

pi_result cuda_piextUSMEnqueueMemset ( pi_queue  queue,
void *  ptr,
pi_int32  value,
size_t  count,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

Definition at line 4735 of file pi_cuda.cpp.

References _pi_event::make_native(), PI_COMMAND_TYPE_MEM_BUFFER_FILL, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueuePrefetch()

pi_result cuda_piextUSMEnqueuePrefetch ( pi_queue  queue,
const void *  ptr,
size_t  size,
pi_usm_migration_flags  flags,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

◆ cuda_piextUSMFree()

pi_result cuda_piextUSMFree ( pi_context  context,
void *  ptr 
)

USM: Frees the given USM pointer associated with the context.

Definition at line 4708 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextUSMGetMemAllocInfo()

pi_result cuda_piextUSMGetMemAllocInfo ( pi_context  context,
const void *  ptr,
pi_mem_alloc_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation.

Result must fit in void * PI_MEM_ALLOC_SIZE returns how big the queried pointer's allocation is in bytes. Result is a size_t. PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against

Parameters
contextis the pi_context
ptris the pointer to query
param_nameis the type of query to perform
param_value_sizeis the size of the result in bytes
param_valueis the result
param_value_size_retis how many bytes were written

Definition at line 4942 of file pi_cuda.cpp.

References cuda_piPlatformsGet(), PI_INVALID_VALUE, PI_MEM_ALLOC_BASE_PTR, PI_MEM_ALLOC_DEVICE, PI_MEM_ALLOC_SIZE, PI_MEM_ALLOC_TYPE, PI_MEM_TYPE_DEVICE, PI_MEM_TYPE_HOST, PI_MEM_TYPE_SHARED, PI_MEM_TYPE_UNKNOWN, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextUSMHostAlloc()

pi_result cuda_piextUSMHostAlloc ( void **  result_ptr,
pi_context  context,
pi_usm_mem_properties properties,
size_t  size,
pi_uint32  alignment 
)

USM: Implements USM Host allocations using CUDA Pinned Memory.

Definition at line 4637 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piextUSMSharedAlloc()

pi_result cuda_piextUSMSharedAlloc ( void **  result_ptr,
pi_context  context,
pi_device  device,
pi_usm_mem_properties properties,
size_t  size,
pi_uint32  alignment 
)

USM: Implements USM Shared allocations using CUDA Managed Memory.

Definition at line 4683 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piKernelCreate()

pi_result cuda_piKernelCreate ( pi_program  program,
const char kernel_name,
pi_kernel kernel 
)

Definition at line 2582 of file pi_cuda.cpp.

References _pi_program::get(), _pi_program::get_context(), PI_OUT_OF_HOST_MEMORY, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piKernelGetGroupInfo()

◆ cuda_piKernelGetInfo()

pi_result cuda_piKernelGetInfo ( pi_kernel  kernel,
pi_kernel_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piKernelGetSubGroupInfo()

pi_result cuda_piKernelGetSubGroupInfo ( pi_kernel  kernel,
pi_device  device,
pi_kernel_sub_group_info  param_name,
size_t  input_value_size,
const void *  input_value,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piKernelRelease()

pi_result cuda_piKernelRelease ( pi_kernel  kernel)

Definition at line 3483 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piKernelRetain()

pi_result cuda_piKernelRetain ( pi_kernel  kernel)

Definition at line 3475 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piKernelSetArg()

pi_result cuda_piKernelSetArg ( pi_kernel  kernel,
pi_uint32  arg_index,
size_t  arg_size,
const void *  arg_value 
)

Definition at line 2622 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piKernelSetExecInfo()

pi_result cuda_piKernelSetExecInfo ( pi_kernel  ,
pi_kernel_exec_info  ,
size_t  ,
const void *   
)

Definition at line 3502 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piMemBufferCreate()

pi_result cuda_piMemBufferCreate ( pi_context  context,
pi_mem_flags  flags,
size_t  size,
void *  host_ptr,
pi_mem ret_mem,
const pi_mem_properties properties 
)

Creates a PI Memory object using a CUDA memory allocation.

Can trigger a manual copy depending on the mode. \TODO Implement USE_HOST_PTR using cuHostRegister

Definition at line 2040 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::mem_::buffer_mem_::copy_in, PI_MEM_FLAGS_HOST_PTR_ALLOC, PI_MEM_FLAGS_HOST_PTR_COPY, PI_MEM_FLAGS_HOST_PTR_USE, PI_OUT_OF_HOST_MEMORY, PI_OUT_OF_RESOURCES, PI_SUCCESS, and _pi_mem::mem_::buffer_mem_::use_host_ptr.

Referenced by piPluginInit().

◆ cuda_piMemBufferPartition()

pi_result cuda_piMemBufferPartition ( pi_mem  parent_buffer,
pi_mem_flags  flags,
pi_buffer_create_type  buffer_create_type,
void *  buffer_create_info,
pi_mem memObj 
)

Implements a buffer partition in the CUDA backend.

A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented as an offset over an existing CUDA allocation.

Definition at line 2179 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::context_, ReleaseGuard< T >::dismiss(), _pi_mem::mem_::buffer_mem_::get_size(), _pi_mem::mem_::buffer_mem_::hostPtr_, _pi_mem::is_buffer(), _pi_mem::is_sub_buffer(), _pi_mem::mem_, PI_BUFFER_CREATE_TYPE_REGION, PI_MEM_FLAGS_ACCESS_RW, PI_OUT_OF_HOST_MEMORY, PI_SUCCESS, and _pi_mem::mem_::buffer_mem_::ptr_.

Referenced by piPluginInit().

◆ cuda_piMemGetInfo()

pi_result cuda_piMemGetInfo ( pi_mem  ,
pi_mem_info  ,
size_t  ,
void *  ,
size_t *   
)

Definition at line 2243 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piMemImageCreate()

◆ cuda_piMemImageGetInfo()

pi_result cuda_piMemImageGetInfo ( pi_mem  ,
pi_image_info  ,
size_t  ,
void *  ,
size_t *   
)

\TODO Not implemented

Definition at line 3059 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piMemRelease()

pi_result cuda_piMemRelease ( pi_mem  memObj)

◆ cuda_piMemRetain()

pi_result cuda_piMemRetain ( pi_mem  mem)

◆ cuda_piPlatformGetInfo()

pi_result cuda_piPlatformGetInfo ( pi_platform  platform,
pi_platform_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piPlatformsGet()

pi_result cuda_piPlatformsGet ( pi_uint32  num_entries,
pi_platform platforms,
pi_uint32 num_platforms 
)

Obtains the CUDA platform.

There is only one CUDA platform, and contains all devices on the system. Triggers the CUDA Driver initialization (cuInit) the first time, so this must be the first PI API called.

However because multiple devices in a context is not currently supported, place each device in a separate platform.

Definition at line 750 of file pi_cuda.cpp.

References cuda_piDeviceGetInfo(), PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, PI_INVALID_VALUE, PI_OUT_OF_HOST_MEMORY, PI_OUT_OF_RESOURCES, and PI_SUCCESS.

Referenced by cuda_piextUSMGetMemAllocInfo(), and piPluginInit().

◆ cuda_piProgramBuild()

pi_result cuda_piProgramBuild ( pi_program  program,
pi_uint32  num_devices,
const pi_device device_list,
const char options,
void(*)(pi_program program, void *user_data)  pfn_notify,
void *  user_data 
)

Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels).

See _pi_program for implementation details.

Definition at line 3086 of file pi_cuda.cpp.

References _pi_program::build_program(), _pi_program::get_context(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piProgramCompile()

pi_result cuda_piProgramCompile ( pi_program  program,
pi_uint32  num_devices,
const pi_device device_list,
const char options,
pi_uint32  num_input_headers,
const pi_program input_headers,
const char **  header_include_names,
void(*)(pi_program program, void *user_data)  pfn_notify,
void *  user_data 
)

Creates a new program that is the outcome of the compilation of the headers and the program.

\TODO Implement asynchronous compilation

Definition at line 3264 of file pi_cuda.cpp.

References _pi_program::build_program(), _pi_program::get_context(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piProgramCreate()

pi_result cuda_piProgramCreate ( pi_context  ,
const void *  ,
size_t  ,
pi_program  
)

\TODO Not implemented

Definition at line 3111 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piProgramCreateWithBinary()

pi_result cuda_piProgramCreateWithBinary ( pi_context  context,
pi_uint32  num_devices,
const pi_device device_list,
const size_t *  lengths,
const unsigned char **  binaries,
size_t  num_metadata_entries,
const pi_device_binary_property metadata,
pi_int32 binary_status,
pi_program program 
)

Loads images from a list of PTX or CUBIN binaries.

Note: No calls to CUDA driver API in this function, only store binaries for later.

Note: Only supports one device

Definition at line 3122 of file pi_cuda.cpp.

References _pi_device::get(), cl::sycl::length(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piProgramGetBuildInfo()

pi_result cuda_piProgramGetBuildInfo ( pi_program  program,
pi_device  device,
cl_program_build_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piProgramGetInfo()

◆ cuda_piProgramLink()

pi_result cuda_piProgramLink ( pi_context  context,
pi_uint32  num_devices,
const pi_device device_list,
const char options,
pi_uint32  num_input_programs,
const pi_program input_programs,
void(*)(pi_program program, void *user_data)  pfn_notify,
void *  user_data,
pi_program ret_program 
)

Creates a new PI program object that is the outcome of linking all input programs.

\TODO Implement linker options, requires mapping of OpenCL to CUDA

Definition at line 3200 of file pi_cuda.cpp.

References _pi_program::binary_, _pi_program::binarySizeInBytes_, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piProgramRelease()

pi_result cuda_piProgramRelease ( pi_program  program)

Decreases the reference count of a pi_program object.

When the reference count reaches 0, it unloads the module from the context.

Definition at line 3329 of file pi_cuda.cpp.

References _pi_program::decrement_reference_count(), _pi_program::get(), _pi_program::get_context(), _pi_program::get_reference_count(), PI_INVALID_PROGRAM, PI_OUT_OF_RESOURCES, and PI_SUCCESS.

Referenced by piPluginInit(), and _pi_kernel::~_pi_kernel().

◆ cuda_piProgramRetain()

pi_result cuda_piProgramRetain ( pi_program  program)

◆ cuda_piQueueCreate()

pi_result cuda_piQueueCreate ( pi_context  context,
pi_device  device,
pi_queue_properties  properties,
pi_queue queue 
)

Creates a pi_queue object on the CUDA backend.

Valid properties

  • __SYCL_PI_CUDA_USE_DEFAULT_STREAM -> CU_STREAM_DEFAULT
  • __SYCL_PI_CUDA_SYNC_WITH_DEFAULT -> CU_STREAM_NON_BLOCKING
    Returns
    Pi queue object mapping to a CUStream

Definition at line 2285 of file pi_cuda.cpp.

References __SYCL_PI_CUDA_SYNC_WITH_DEFAULT, __SYCL_PI_CUDA_USE_DEFAULT_STREAM, _pi_queue::default_num_compute_streams, _pi_queue::default_num_transfer_streams, PI_INVALID_DEVICE, PI_OUT_OF_RESOURCES, PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piQueueFinish()

pi_result cuda_piQueueFinish ( pi_queue  command_queue)

◆ cuda_piQueueFlush()

pi_result cuda_piQueueFlush ( pi_queue  command_queue)

Definition at line 2415 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piQueueGetInfo()

pi_result cuda_piQueueGetInfo ( pi_queue  command_queue,
pi_queue_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piQueueRelease()

◆ cuda_piQueueRetain()

pi_result cuda_piQueueRetain ( pi_queue  command_queue)

◆ cuda_piSamplerCreate()

pi_result cuda_piSamplerCreate ( pi_context  context,
const pi_sampler_properties sampler_properties,
pi_sampler result_sampler 
)

Creates a PI sampler object.

Parameters
[in]contextThe context the sampler is created for.
[in]sampler_propertiesThe properties for the sampler.
[out]result_samplerSet to the resulting sampler object.
Returns
PI_SUCCESS on success. PI_INVALID_VALUE if given an invalid property or if there is multiple of properties from the same category.

Definition at line 3737 of file pi_cuda.cpp.

References PI_INVALID_VALUE, PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, PI_SAMPLER_PROPERTIES_FILTER_MODE, PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS, and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piSamplerGetInfo()

pi_result cuda_piSamplerGetInfo ( pi_sampler  sampler,
cl_sampler_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

Gets information from a PI sampler object.

Parameters
[in]samplerThe sampler to get the information from.
[in]param_nameThe name of the information to get.
[in]param_value_sizeThe size of the param_value.
[out]param_valueSet to information value.
[out]param_value_size_retSet to the size of the information value.
Returns
PI_SUCCESS on success.

Definition at line 3794 of file pi_cuda.cpp.

References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_sampler::context_, _pi_sampler::get_reference_count(), PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_INFO_ADDRESSING_MODE, PI_SAMPLER_INFO_CONTEXT, PI_SAMPLER_INFO_FILTER_MODE, PI_SAMPLER_INFO_NORMALIZED_COORDS, PI_SAMPLER_INFO_REFERENCE_COUNT, and _pi_sampler::props_.

Referenced by piPluginInit().

◆ cuda_piSamplerRelease()

pi_result cuda_piSamplerRelease ( pi_sampler  sampler)

Releases a PI sampler object, decrementing its reference count.

If the reference count reaches zero, the sampler object is destroyed.

Parameters
[in]samplerThe sampler to decrement the reference count of.
Returns
PI_SUCCESS.

Definition at line 3847 of file pi_cuda.cpp.

References cl::sycl::detail::pi::assertion(), _pi_sampler::decrement_reference_count(), _pi_sampler::get_reference_count(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piSamplerRetain()

pi_result cuda_piSamplerRetain ( pi_sampler  sampler)

Retains a PI sampler object, incrementing its reference count.

Parameters
[in]samplerThe sampler to increment the reference count of.
Returns
PI_SUCCESS.

Definition at line 3835 of file pi_cuda.cpp.

References _pi_sampler::increment_reference_count(), and PI_SUCCESS.

Referenced by piPluginInit().

◆ cuda_piTearDown()

pi_result cuda_piTearDown ( void *  )

Definition at line 5043 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by piPluginInit().

◆ enqueueEventWait()

pi_result enqueueEventWait ( pi_queue  queue,
pi_event  event 
)

Definition at line 547 of file pi_cuda.cpp.

References PI_SUCCESS.

Referenced by cuda_piEnqueueEventsWaitWithBarrier().

◆ get_kernel_metadata()

bool get_kernel_metadata ( std::string  metadataName,
const char tag,
std::string &  kernelName 
)

Definition at line 565 of file pi_cuda.cpp.

Referenced by _pi_program::set_metadata().

◆ getKernelNames()

std::string getKernelNames ( pi_program  )

Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this.

Note: This is currently only being used by the SYCL program class for the has_kernel method, so an alternative would be to move the has_kernel query to PI and use cuModuleGetFunction to check for a kernel. Note: Another alternative is to add kernel names as metadata, like with reqd_work_group_size.

Definition at line 655 of file pi_cuda.cpp.

References cl::sycl::detail::pi::die().

Referenced by cuda_piProgramGetInfo().

◆ imageElementByteSize()

static size_t imageElementByteSize ( CUDA_ARRAY_DESCRIPTOR  array_desc)
static

◆ piPluginInit()

pi_result piPluginInit ( pi_plugin PluginInit)

Definition at line 5047 of file pi_cuda.cpp.

References _PI_CL, cuda_piclProgramCreateWithSource(), cuda_piContextCreate(), cuda_piContextGetInfo(), cuda_piContextRelease(), cuda_piContextRetain(), cuda_piDeviceGetInfo(), cuda_piDevicePartition(), cuda_piDeviceRelease(), cuda_piDeviceRetain(), cuda_piDevicesGet(), cuda_piEnqueueEventsWait(), cuda_piEnqueueEventsWaitWithBarrier(), cuda_piEnqueueKernelLaunch(), cuda_piEnqueueMemBufferCopy(), cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferFill(), cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemBufferRead(), cuda_piEnqueueMemBufferReadRect(), cuda_piEnqueueMemBufferWrite(), cuda_piEnqueueMemBufferWriteRect(), cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageFill(), cuda_piEnqueueMemImageRead(), cuda_piEnqueueMemImageWrite(), cuda_piEnqueueMemUnmap(), cuda_piEnqueueNativeKernel(), cuda_piEventCreate(), cuda_piEventGetInfo(), cuda_piEventGetProfilingInfo(), cuda_piEventRelease(), cuda_piEventRetain(), cuda_piEventSetCallback(), cuda_piEventSetStatus(), cuda_piEventsWait(), cuda_piextContextCreateWithNativeHandle(), cuda_piextContextGetNativeHandle(), cuda_piextContextSetExtendedDeleter(), cuda_piextDeviceCreateWithNativeHandle(), cuda_piextDeviceGetNativeHandle(), cuda_piextDeviceSelectBinary(), cuda_piextEventCreateWithNativeHandle(), cuda_piextEventGetNativeHandle(), cuda_piextGetDeviceFunctionPointer(), cuda_piextKernelCreateWithNativeHandle(), cuda_piextKernelSetArgMemObj(), cuda_piextKernelSetArgPointer(), cuda_piextKernelSetArgSampler(), cuda_piextMemCreateWithNativeHandle(), cuda_piextMemGetNativeHandle(), cuda_piextProgramCreateWithNativeHandle(), cuda_piextProgramGetNativeHandle(), cuda_piextProgramSetSpecializationConstant(), cuda_piextQueueCreateWithNativeHandle(), cuda_piextQueueGetNativeHandle(), cuda_piextUSMDeviceAlloc(), cuda_piextUSMEnqueueMemAdvise(), cuda_piextUSMEnqueueMemcpy(), cuda_piextUSMEnqueueMemset(), cuda_piextUSMEnqueuePrefetch(), cuda_piextUSMFree(), cuda_piextUSMGetMemAllocInfo(), cuda_piextUSMHostAlloc(), cuda_piextUSMSharedAlloc(), cuda_piKernelCreate(), cuda_piKernelGetGroupInfo(), cuda_piKernelGetInfo(), cuda_piKernelGetSubGroupInfo(), cuda_piKernelRelease(), cuda_piKernelRetain(), cuda_piKernelSetArg(), cuda_piKernelSetExecInfo(), cuda_piMemBufferCreate(), cuda_piMemBufferPartition(), cuda_piMemGetInfo(), cuda_piMemImageCreate(), cuda_piMemImageGetInfo(), cuda_piMemRelease(), cuda_piMemRetain(), cuda_piPlatformGetInfo(), cuda_piPlatformsGet(), cuda_piProgramBuild(), cuda_piProgramCompile(), cuda_piProgramCreate(), cuda_piProgramCreateWithBinary(), cuda_piProgramGetBuildInfo(), cuda_piProgramGetInfo(), cuda_piProgramLink(), cuda_piProgramRelease(), cuda_piProgramRetain(), cuda_piQueueCreate(), cuda_piQueueFinish(), cuda_piQueueFlush(), cuda_piQueueGetInfo(), cuda_piQueueRelease(), cuda_piQueueRetain(), cuda_piSamplerCreate(), cuda_piSamplerGetInfo(), cuda_piSamplerRelease(), cuda_piSamplerRetain(), cuda_piTearDown(), PI_INVALID_OPERATION, PI_INVALID_VALUE, PI_SUCCESS, piclProgramCreateWithSource(), piContextCreate(), piContextGetInfo(), piContextRelease(), piContextRetain(), piDeviceGetInfo(), piDevicePartition(), piDeviceRelease(), piDeviceRetain(), piDevicesGet(), piEnqueueEventsWait(), piEnqueueEventsWaitWithBarrier(), piEnqueueKernelLaunch(), piEnqueueMemBufferCopy(), piEnqueueMemBufferCopyRect(), piEnqueueMemBufferFill(), piEnqueueMemBufferMap(), piEnqueueMemBufferRead(), piEnqueueMemBufferReadRect(), piEnqueueMemBufferWrite(), piEnqueueMemBufferWriteRect(), piEnqueueMemImageCopy(), piEnqueueMemImageFill(), piEnqueueMemImageRead(), piEnqueueMemImageWrite(), piEnqueueMemUnmap(), piEnqueueNativeKernel(), piEventCreate, piEventGetInfo(), piEventGetProfilingInfo(), piEventRelease(), piEventRetain(), piEventSetCallback(), piEventSetStatus(), piEventsWait(), piextContextCreateWithNativeHandle(), piextContextGetNativeHandle(), piextContextSetExtendedDeleter(), piextDeviceCreateWithNativeHandle(), piextDeviceGetNativeHandle(), piextDeviceSelectBinary(), piextEventCreateWithNativeHandle(), piextEventGetNativeHandle(), piextGetDeviceFunctionPointer(), piextKernelCreateWithNativeHandle(), piextKernelSetArgMemObj(), piextKernelSetArgPointer(), piextKernelSetArgSampler(), piextMemCreateWithNativeHandle(), piextMemGetNativeHandle(), piextProgramCreateWithNativeHandle(), piextProgramGetNativeHandle(), piextProgramSetSpecializationConstant(), piextQueueCreateWithNativeHandle(), piextQueueGetNativeHandle(), piextUSMDeviceAlloc(), piextUSMEnqueueMemAdvise(), piextUSMEnqueueMemcpy(), piextUSMEnqueueMemset(), piextUSMEnqueuePrefetch(), piextUSMFree(), piextUSMGetMemAllocInfo(), piextUSMHostAlloc(), piextUSMSharedAlloc(), _pi_plugin::PiFunctionTable, piKernelCreate(), piKernelGetGroupInfo(), piKernelGetInfo(), piKernelGetSubGroupInfo(), piKernelRelease(), piKernelRetain(), piKernelSetArg(), piKernelSetExecInfo(), piMemBufferCreate(), piMemBufferPartition(), piMemGetInfo(), piMemImageCreate(), piMemImageGetInfo(), piMemRelease(), piMemRetain(), piPlatformGetInfo(), piPlatformsGet(), piPluginGetLastError(), piProgramBuild(), piProgramCompile(), piProgramCreate(), piProgramCreateWithBinary(), piProgramGetBuildInfo(), piProgramGetInfo(), piProgramLink(), piProgramRelease(), piProgramRetain(), piQueueCreate(), piQueueFinish(), piQueueFlush(), piQueueGetInfo(), piQueueRelease(), piQueueRetain(), piSamplerCreate(), piSamplerGetInfo(), piSamplerRelease(), piSamplerRetain(), piTearDown(), _pi_plugin::PiVersion, _pi_plugin::PluginVersion, and SupportedVersion.

Variable Documentation

◆ SupportedVersion

const char SupportedVersion[] = _PI_H_VERSION_STRING

Definition at line 5045 of file pi_cuda.cpp.

Referenced by piPluginInit().