DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_cuda.cpp File Reference
#include <pi_cuda.hpp>
#include <sycl/detail/cuda_definitions.hpp>
#include <sycl/detail/defines.hpp>
#include <sycl/detail/pi.hpp>
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cuda.h>
#include <cuda_device_runtime_api.h>
#include <limits>
#include <memory>
#include <mutex>
#include <regex>
#include <string_view>
Include dependency graph for pi_cuda.cpp:

Go to the source code of this file.

Namespaces

 sycl
 ---— Error handling, matching OpenCL plugin semantics.
 
 sycl::_V1
 
 sycl::_V1::detail
 
 sycl::_V1::detail::pi
 

Macros

#define _PI_CL(pi_api, cuda_api)   (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);
 

Functions

void enableCUDATracing ()
 
void disableCUDATracing ()
 
void sycl::_V1::detail::pi::die (const char *Message)
 
void sycl::_V1::detail::pi::cuPrint (const char *Message)
 
void sycl::_V1::detail::pi::assertion (bool Condition, const char *Message=nullptr)
 
pi_result cuda_piEnqueueEventsWait (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Enqueues a wait on the given CUstream for all events. More...
 
pi_result cuda_piEnqueueEventsWaitWithBarrier (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue. More...
 
pi_result cuda_piEventRelease (pi_event event)
 
pi_result cuda_piEventRetain (pi_event event)
 
pi_result enqueueEventWait (pi_queue queue, pi_event event)
 
std::pair< std::string, std::string > splitMetadataName (const std::string &metadataName)
 
std::string getKernelNames (pi_program)
 Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this. More...
 
pi_result cuda_piDeviceGetInfo (pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piPlatformsGet (pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
 Obtains the CUDA platform. More...
 
pi_result cuda_piPlatformGetInfo ([[maybe_unused]] pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piDevicesGet (pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
 
pi_result cuda_piDeviceRetain (pi_device)
 
pi_result cuda_piContextGetInfo (pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piContextRetain (pi_context context)
 
pi_result cuda_piextContextSetExtendedDeleter (pi_context context, pi_context_extended_deleter function, void *user_data)
 
pi_result cuda_piDevicePartition (pi_device, const pi_device_partition_property *, pi_uint32, pi_device *, pi_uint32 *)
 Not applicable to CUDA, devices cannot be partitioned. More...
 
pi_result cuda_piextDeviceSelectBinary (pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary)
 
pi_result cuda_piextGetDeviceFunctionPointer ([[maybe_unused]] pi_device device, pi_program program, const char *func_name, pi_uint64 *func_pointer_ret)
 
pi_result cuda_piDeviceRelease (pi_device)
 
pi_result cuda_piextDeviceGetNativeHandle (pi_device device, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI device object. More...
 
pi_result cuda_piextDeviceCreateWithNativeHandle (pi_native_handle nativeHandle, pi_platform platform, pi_device *piDevice)
 Created a PI device object from a CUDA device handle. More...
 
pi_result cuda_piContextCreate ([[maybe_unused]] const pi_context_properties *properties, [[maybe_unused]] pi_uint32 num_devices, const pi_device *devices, [[maybe_unused]] void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data), [[maybe_unused]] void *user_data, pi_context *retcontext)
 Create a PI CUDA context. More...
 
pi_result cuda_piContextRelease (pi_context ctxt)
 
pi_result cuda_piextContextGetNativeHandle (pi_context context, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI context object. More...
 
pi_result cuda_piextContextCreateWithNativeHandle (pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext)
 Created a PI context object from a CUDA context handle. More...
 
pi_result cuda_piMemBufferCreate (pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, [[maybe_unused]] const pi_mem_properties *properties)
 Creates a PI Memory object using a CUDA memory allocation. More...
 
pi_result cuda_piMemRelease (pi_mem memObj)
 Decreases the reference count of the Mem object. More...
 
pi_result cuda_piMemBufferPartition (pi_mem parent_buffer, pi_mem_flags flags, [[maybe_unused]] pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *memObj)
 Implements a buffer partition in the CUDA backend. More...
 
pi_result cuda_piMemGetInfo (pi_mem, pi_mem_info, size_t, void *, size_t *)
 
pi_result cuda_piextMemGetNativeHandle (pi_mem mem, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI mem object. More...
 
pi_result cuda_piextMemCreateWithNativeHandle (pi_native_handle, pi_context, bool, pi_mem *)
 Created a PI mem object from a CUDA mem handle. More...
 
pi_result cuda_piextMemImageCreateWithNativeHandle (pi_native_handle, pi_context, bool, const pi_image_format *, const pi_image_desc *, pi_mem *)
 Created a PI image mem object from a CUDA image mem handle. More...
 
pi_result cuda_piQueueCreate (pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
 Creates a pi_queue object on the CUDA backend. More...
 
pi_result cuda_piextQueueCreate (pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue)
 
pi_result cuda_piQueueGetInfo (pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piQueueRetain (pi_queue command_queue)
 
pi_result cuda_piQueueRelease (pi_queue command_queue)
 
pi_result cuda_piQueueFinish (pi_queue command_queue)
 
pi_result cuda_piQueueFlush (pi_queue command_queue)
 
pi_result cuda_piextQueueGetNativeHandle (pi_queue queue, pi_native_handle *nativeHandle, int32_t *NativeHandleDesc)
 Gets the native CUDA handle of a PI queue object. More...
 
pi_result cuda_piextQueueCreateWithNativeHandle (pi_native_handle nativeHandle, int32_t NativeHandleDesc, pi_context context, pi_device device, bool ownNativeHandle, pi_queue_properties *Properties, pi_queue *queue)
 Created a PI queue object from a CUDA queue handle. More...
 
pi_result cuda_piEnqueueMemBufferWrite (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferRead (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEventsWait (pi_uint32 num_events, const pi_event *event_list)
 
pi_result cuda_piKernelCreate (pi_program program, const char *kernel_name, pi_kernel *kernel)
 
pi_result cuda_piKernelSetArg (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
 
pi_result cuda_piextKernelSetArgMemObj (pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
 
pi_result cuda_piextKernelSetArgSampler (pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
 
pi_result cuda_piKernelGetGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piEnqueueKernelLaunch (pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueNativeKernel (pi_queue, void(*)(void *), void *, size_t, pi_uint32, const pi_mem *, const void **, pi_uint32, const pi_event *, pi_event *)
 \TODO Not implemented More...
 
pi_result cuda_piextKernelCreateWithNativeHandle (pi_native_handle, pi_context, pi_program, bool, pi_kernel *)
 
pi_result cuda_piMemImageCreate (pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
 \TODO Not implemented More...
 
pi_result cuda_piMemImageGetInfo (pi_mem, pi_image_info, size_t, void *, size_t *)
 \TODO Not implemented More...
 
pi_result cuda_piMemRetain (pi_mem mem)
 
pi_result cuda_piclProgramCreateWithSource (pi_context, pi_uint32, const char **, const size_t *, pi_program *)
 Not used as CUDA backend only creates programs from binary. More...
 
pi_result cuda_piProgramBuild (pi_program program, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const char *options, [[maybe_unused]] void(*pfn_notify)(pi_program program, void *user_data), [[maybe_unused]] void *user_data)
 Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels). More...
 
pi_result cuda_piProgramCreate (pi_context, const void *, size_t, pi_program *)
 \TODO Not implemented More...
 
pi_result cuda_piProgramCreateWithBinary (pi_context context, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *program)
 Loads images from a list of PTX or CUBIN binaries. More...
 
pi_result cuda_piProgramGetInfo (pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piProgramLink (pi_context context, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, [[maybe_unused]] void(*pfn_notify)(pi_program program, void *user_data), [[maybe_unused]] void *user_data, pi_program *ret_program)
 Creates a new PI program object that is the outcome of linking all input programs. More...
 
pi_result cuda_piProgramCompile (pi_program program, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const char *options, [[maybe_unused]] pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, [[maybe_unused]] void(*pfn_notify)(pi_program program, void *user_data), [[maybe_unused]] void *user_data)
 Creates a new program that is the outcome of the compilation of the headers and the program. More...
 
pi_result cuda_piProgramGetBuildInfo (pi_program program, pi_device device, pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piProgramRetain (pi_program program)
 
pi_result cuda_piProgramRelease (pi_program program)
 Decreases the reference count of a pi_program object. More...
 
pi_result cuda_piextProgramGetNativeHandle (pi_program program, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI program object. More...
 
pi_result cuda_piextProgramCreateWithNativeHandle (pi_native_handle, pi_context, bool, pi_program *)
 Created a PI program object from a CUDA program handle. More...
 
pi_result cuda_piKernelGetInfo (pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piKernelGetSubGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piKernelRetain (pi_kernel kernel)
 
pi_result cuda_piKernelRelease (pi_kernel kernel)
 
pi_result cuda_piKernelSetExecInfo (pi_kernel, pi_kernel_exec_info, size_t, const void *)
 
pi_result cuda_piextProgramSetSpecializationConstant (pi_program, pi_uint32, size_t, const void *)
 
pi_result cuda_piextKernelSetArgPointer (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
 
pi_result cuda_piEventCreate (pi_context, pi_event *)
 
pi_result cuda_piEventGetInfo (pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 
pi_result cuda_piEventGetProfilingInfo (pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 Obtain profiling information from PI CUDA events \TODO Timings from CUDA are only elapsed time. More...
 
pi_result cuda_piEventSetCallback (pi_event, pi_int32, pfn_notify, void *)
 
pi_result cuda_piEventSetStatus (pi_event, pi_int32)
 
pi_result cuda_piextEventGetNativeHandle (pi_event event, pi_native_handle *nativeHandle)
 Gets the native CUDA handle of a PI event object. More...
 
pi_result cuda_piextEventCreateWithNativeHandle (pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *event)
 Created a PI event object from a CUDA event handle. More...
 
pi_result cuda_piSamplerCreate (pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
 Creates a PI sampler object. More...
 
pi_result cuda_piSamplerGetInfo (pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 Gets information from a PI sampler object. More...
 
pi_result cuda_piSamplerRetain (pi_sampler sampler)
 Retains a PI sampler object, incrementing its reference count. More...
 
pi_result cuda_piSamplerRelease (pi_sampler sampler)
 Releases a PI sampler object, decrementing its reference count. More...
 
static pi_result commonEnqueueMemBufferCopyRect (CUstream cu_stream, pi_buff_rect_region region, const void *src_ptr, const CUmemorytype_enum src_type, pi_buff_rect_offset src_offset, size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr, const CUmemorytype_enum dst_type, pi_buff_rect_offset dst_offset, size_t dst_row_pitch, size_t dst_slice_pitch)
 General 3D memory copy operation. More...
 
pi_result cuda_piEnqueueMemBufferReadRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferWriteRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferCopy (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferCopyRect (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemBufferFill (pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
static size_t imageElementByteSize (CUDA_ARRAY_DESCRIPTOR array_desc)
 
static pi_result commonEnqueueMemImageNDCopy (CUstream cu_stream, pi_mem_type img_type, const size_t *region, const void *src_ptr, const CUmemorytype_enum src_type, const size_t *src_offset, void *dst_ptr, const CUmemorytype_enum dst_type, const size_t *dst_offset)
 General ND memory copy operation for images (where N > 1). More...
 
pi_result cuda_piEnqueueMemImageRead (pi_queue command_queue, pi_mem image, pi_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageWrite (pi_queue command_queue, pi_mem image, pi_bool blocking_write, const size_t *origin, const size_t *region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageCopy (pi_queue command_queue, pi_mem src_image, pi_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piEnqueueMemImageFill (pi_queue, pi_mem, const void *, const size_t *, const size_t *, pi_uint32, const pi_event *, pi_event *)
 \TODO Not implemented in CUDA. More...
 
pi_result cuda_piEnqueueMemBufferMap (pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
 Implements mapping on the host using a BufferRead operation. More...
 
pi_result cuda_piEnqueueMemUnmap (pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 Implements the unmap from the host, using a BufferWrite operation. More...
 
pi_result cuda_piextUSMHostAlloc (void **result_ptr, pi_context context, [[maybe_unused]] pi_usm_mem_properties *properties, size_t size, [[maybe_unused]] pi_uint32 alignment)
 USM: Implements USM Host allocations using CUDA Pinned Memory. More...
 
pi_result cuda_piextUSMDeviceAlloc (void **result_ptr, pi_context context, [[maybe_unused]] pi_device device, [[maybe_unused]] pi_usm_mem_properties *properties, size_t size, [[maybe_unused]] pi_uint32 alignment)
 USM: Implements USM device allocations using a normal CUDA device pointer. More...
 
pi_result cuda_piextUSMSharedAlloc (void **result_ptr, pi_context context, [[maybe_unused]] pi_device device, [[maybe_unused]] pi_usm_mem_properties *properties, size_t size, [[maybe_unused]] pi_uint32 alignment)
 USM: Implements USM Shared allocations using CUDA Managed Memory. More...
 
pi_result cuda_piextUSMFree (pi_context context, void *ptr)
 USM: Frees the given USM pointer associated with the context. More...
 
pi_result cuda_piextUSMEnqueueMemset (pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueueMemcpy (pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueuePrefetch (pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piextUSMEnqueueMemAdvise (pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
 USM: memadvise API to govern behavior of automatic migration mechanisms. More...
 
pi_result cuda_piextUSMEnqueueFill2D (pi_queue, void *, size_t, size_t, const void *, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
 
pi_result cuda_piextUSMEnqueueMemset2D (pi_queue, void *, size_t, int, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
 
pi_result cuda_piextUSMEnqueueMemcpy2D (pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 2D Memcpy API More...
 
pi_result cuda_piextUSMGetMemAllocInfo (pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation. More...
 
pi_result cuda_piextEnqueueDeviceGlobalVariableWrite (pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piextEnqueueDeviceGlobalVariableRead (pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
 
pi_result cuda_piextEnqueueReadHostPipe (pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 Host Pipes. More...
 
pi_result cuda_piextEnqueueWriteHostPipe (pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
 
pi_result cuda_piTearDown (void *)
 
pi_result cuda_piGetDeviceAndHostTimer (pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime)
 
pi_result piPluginInit (pi_plugin *PluginInit)
 

Variables

const char SupportedVersion [] = _PI_CUDA_PLUGIN_VERSION_STRING
 

Detailed Description

Implementation of CUDA Plugin.

Definition in file pi_cuda.cpp.

Macro Definition Documentation

◆ _PI_CL

#define _PI_CL (   pi_api,
  cuda_api 
)    (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);

Function Documentation

◆ commonEnqueueMemBufferCopyRect()

static pi_result commonEnqueueMemBufferCopyRect ( CUstream  cu_stream,
pi_buff_rect_region  region,
const void *  src_ptr,
const CUmemorytype_enum  src_type,
pi_buff_rect_offset  src_offset,
size_t  src_row_pitch,
size_t  src_slice_pitch,
void *  dst_ptr,
const CUmemorytype_enum  dst_type,
pi_buff_rect_offset  dst_offset,
size_t  dst_row_pitch,
size_t  dst_slice_pitch 
)
static

General 3D memory copy operation.

This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is on the device, src_ptr and/or dst_ptr must be a pointer to a CUdeviceptr

Definition at line 4417 of file pi_cuda.cpp.

References pi_buff_rect_region_struct::depth_scalar, pi_buff_rect_region_struct::height_scalar, pi_buff_rect_region_struct::width_bytes, pi_buff_rect_offset_struct::x_bytes, pi_buff_rect_offset_struct::y_scalar, and pi_buff_rect_offset_struct::z_scalar.

Referenced by cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferReadRect(), and cuda_piEnqueueMemBufferWriteRect().

◆ commonEnqueueMemImageNDCopy()

static pi_result commonEnqueueMemImageNDCopy ( CUstream  cu_stream,
pi_mem_type  img_type,
const size_t *  region,
const void *  src_ptr,
const CUmemorytype_enum  src_type,
const size_t *  src_offset,
void *  dst_ptr,
const CUmemorytype_enum  dst_type,
const size_t *  dst_offset 
)
static

General ND memory copy operation for images (where N > 1).

This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is an array, src_ptr and/or dst_ptr must be a pointer to a CUarray

Definition at line 4789 of file pi_cuda.cpp.

References PI_MEM_TYPE_IMAGE2D, and PI_MEM_TYPE_IMAGE3D.

Referenced by cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), and cuda_piEnqueueMemImageWrite().

◆ cuda_piclProgramCreateWithSource()

pi_result cuda_piclProgramCreateWithSource ( pi_context  ,
pi_uint32  ,
const char **  ,
const size_t *  ,
pi_program  
)

Not used as CUDA backend only creates programs from binary.

See cuda_piclProgramCreateWithBinary.

Definition at line 3566 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::cuPrint().

Referenced by piPluginInit().

◆ cuda_piContextCreate()

pi_result cuda_piContextCreate ( [[maybe_unused] ] const pi_context_properties properties,
[[maybe_unused] ] pi_uint32  num_devices,
const pi_device devices,
[[maybe_unused] ] void(*)(const char *errinfo, const void *private_info, size_t cb, void *user_data)  pfn_notify,
[[maybe_unused] ] void *  user_data,
pi_context retcontext 
)

Create a PI CUDA context.

By default creates a scoped context and keeps the last active CUDA context on top of the CUDA context stack. With the __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE creates a primary CUDA context and activates it on the CUDA context stack.

Parameters
[in]properties0 terminated array of key/id-value combinations. Can be nullptr. Only accepts property key/id __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY with a pi_bool value.
[in]num_devicesNumber of devices to create the context for.
[in]devicesDevices to create the context for.
[in]pfn_notifyCallback, currently unused.
[in]user_dataUser data for callback.
[out]retcontextSet to created context on success.
Returns
PI_SUCCESS on success, otherwise an error return code.

Definition at line 2273 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piContextGetInfo()

◆ cuda_piContextRelease()

◆ cuda_piContextRetain()

pi_result cuda_piContextRetain ( pi_context  context)

◆ cuda_piDeviceGetInfo()

pi_result cuda_piDeviceGetInfo ( pi_device  device,
pi_device_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

Definition at line 1206 of file pi_cuda.cpp.

References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, sycl::_V1::detail::pi::assertion(), sycl::_V1::detail::pi::cuPrint(), sycl::_V1::detail::pi::die(), getInfo(), getInfoArray(), min(), PI_DEVICE_EXEC_CAPABILITIES_KERNEL, PI_DEVICE_INFO_ADDRESS_BITS, PI_DEVICE_INFO_ATOMIC_64, PI_DEVICE_INFO_AVAILABLE, PI_DEVICE_INFO_BACKEND_VERSION, PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, PI_DEVICE_INFO_BUILT_IN_KERNELS, PI_DEVICE_INFO_COMPILER_AVAILABLE, PI_DEVICE_INFO_DEVICE_ID, PI_DEVICE_INFO_DOUBLE_FP_CONFIG, PI_DEVICE_INFO_DRIVER_VERSION, PI_DEVICE_INFO_ENDIAN_LITTLE, PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT, PI_DEVICE_INFO_EXECUTION_CAPABILITIES, PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT, PI_DEVICE_INFO_EXTENSIONS, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE, PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_SIZE, PI_DEVICE_INFO_GPU_EU_COUNT, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, PI_DEVICE_INFO_GPU_SLICES, PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, PI_DEVICE_INFO_HALF_FP_CONFIG, PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_SRGB, PI_DEVICE_INFO_IMAGE_SUPPORT, PI_DEVICE_INFO_LINKER_AVAILABLE, PI_DEVICE_INFO_LOCAL_MEM_SIZE, PI_DEVICE_INFO_LOCAL_MEM_TYPE, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_MAX_COMPUTE_UNITS, PI_DEVICE_INFO_MAX_CONSTANT_ARGS, PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE, PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, PI_DEVICE_INFO_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, PI_DEVICE_INFO_MAX_SAMPLERS, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, PI_DEVICE_INFO_NAME, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_OPENCL_C_VERSION, PI_DEVICE_INFO_PARENT_DEVICE, PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, PI_DEVICE_INFO_PARTITION_PROPERTIES, PI_DEVICE_INFO_PARTITION_TYPE, PI_DEVICE_INFO_PCI_ADDRESS, PI_DEVICE_INFO_PLATFORM, PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, PI_DEVICE_INFO_PROFILE, PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION, PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES, PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES, PI_DEVICE_INFO_REFERENCE_COUNT, PI_DEVICE_INFO_SINGLE_FP_CONFIG, PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, PI_DEVICE_INFO_TYPE, PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, PI_DEVICE_INFO_USM_DEVICE_SUPPORT, PI_DEVICE_INFO_USM_HOST_SUPPORT, PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, PI_DEVICE_INFO_UUID, PI_DEVICE_INFO_VENDOR, PI_DEVICE_INFO_VENDOR_ID, PI_DEVICE_INFO_VERSION, PI_DEVICE_LOCAL_MEM_TYPE_LOCAL, PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE, PI_DEVICE_TYPE_GPU, PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP, PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES, PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, PI_FALSE, PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT, PI_FP_DENORM, PI_FP_FMA, PI_FP_INF_NAN, PI_FP_ROUND_TO_INF, PI_FP_ROUND_TO_NEAREST, PI_FP_ROUND_TO_ZERO, PI_MEMORY_ORDER_ACQ_REL, PI_MEMORY_ORDER_ACQUIRE, PI_MEMORY_ORDER_RELAXED, PI_MEMORY_ORDER_RELEASE, PI_MEMORY_SCOPE_DEVICE, PI_MEMORY_SCOPE_SUB_GROUP, PI_MEMORY_SCOPE_SYSTEM, PI_MEMORY_SCOPE_WORK_GROUP, PI_MEMORY_SCOPE_WORK_ITEM, PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, PI_QUEUE_FLAG_PROFILING_ENABLE, PI_TRUE, PI_USM_ACCESS, PI_USM_ATOMIC_ACCESS, PI_USM_CONCURRENT_ACCESS, and PI_USM_CONCURRENT_ATOMIC_ACCESS.

Referenced by cuda_piPlatformsGet(), and piPluginInit().

◆ cuda_piDevicePartition()

pi_result cuda_piDevicePartition ( pi_device  ,
const pi_device_partition_property ,
pi_uint32  ,
pi_device ,
pi_uint32  
)

Not applicable to CUDA, devices cannot be partitioned.

Definition at line 1143 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piDeviceRelease()

pi_result cuda_piDeviceRelease ( pi_device  )
Returns
PI_SUCCESS always since CUDA devices are always root devices.

Definition at line 1204 of file pi_cuda.cpp.

Referenced by piPluginInit(), _pi_context::~_pi_context(), and _pi_queue::~_pi_queue().

◆ cuda_piDeviceRetain()

pi_result cuda_piDeviceRetain ( pi_device  )
Returns
PI_SUCCESS if the function is executed successfully CUDA devices are always root devices so retain always returns success.

Definition at line 1088 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piDevicesGet()

pi_result cuda_piDevicesGet ( pi_platform  platform,
pi_device_type  device_type,
pi_uint32  num_entries,
pi_device devices,
pi_uint32 num_devices 
)
Parameters
devicesList of devices available on the system
num_devicesNumber of elements in the list of devices Requesting a non-GPU device triggers an error, all PI CUDA devices are GPUs.

Definition at line 1056 of file pi_cuda.cpp.

References PI_DEVICE_TYPE_DEFAULT, and PI_DEVICE_TYPE_GPU.

Referenced by piPluginInit().

◆ cuda_piEnqueueEventsWait()

pi_result cuda_piEnqueueEventsWait ( pi_queue  command_queue,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Enqueues a wait on the given CUstream for all events.

See enqueueEventWait TODO: Add support for multiple streams once the Event class is properly refactored.

Definition at line 4137 of file pi_cuda.cpp.

References cuda_piEnqueueEventsWaitWithBarrier().

Referenced by cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemUnmap(), and piPluginInit().

◆ cuda_piEnqueueEventsWaitWithBarrier()

pi_result cuda_piEnqueueEventsWaitWithBarrier ( pi_queue  command_queue,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue.

Parameters
[in]command_queueA valid PI queue.
[in]num_events_in_wait_listNumber of events in event_wait_list.
[in]event_wait_listEvents to wait on.
[out]eventEvent for when all events in event_wait_list have finished or, if event_wait_list is empty, when all previous events in the queue have finished.
Returns
TBD

Definition at line 4157 of file pi_cuda.cpp.

References _pi_queue::barrier_event_, _pi_queue::barrier_mutex_, _pi_queue::barrier_tmp_event_, _pi_queue::compute_applied_barrier_, _pi_queue::get_context(), _pi_queue::get_next_compute_stream(), _pi_event::make_native(), PI_COMMAND_TYPE_MARKER, _pi_queue::sync_streams(), and _pi_queue::transfer_applied_barrier_.

Referenced by cuda_piEnqueueEventsWait(), cuda_piEnqueueKernelLaunch(), and piPluginInit().

◆ cuda_piEnqueueKernelLaunch()

pi_result cuda_piEnqueueKernelLaunch ( pi_queue  command_queue,
pi_kernel  kernel,
pi_uint32  work_dim,
const size_t *  global_work_offset,
const size_t *  global_work_size,
const size_t *  local_work_size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferCopy()

pi_result cuda_piEnqueueMemBufferCopy ( pi_queue  command_queue,
pi_mem  src_buffer,
pi_mem  dst_buffer,
size_t  src_offset,
size_t  dst_offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferCopyRect()

pi_result cuda_piEnqueueMemBufferCopyRect ( pi_queue  command_queue,
pi_mem  src_buffer,
pi_mem  dst_buffer,
pi_buff_rect_offset  src_origin,
pi_buff_rect_offset  dst_origin,
pi_buff_rect_region  region,
size_t  src_row_pitch,
size_t  src_slice_pitch,
size_t  dst_row_pitch,
size_t  dst_slice_pitch,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferFill()

pi_result cuda_piEnqueueMemBufferFill ( pi_queue  command_queue,
pi_mem  buffer,
const void *  pattern,
size_t  pattern_size,
size_t  offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferMap()

pi_result cuda_piEnqueueMemBufferMap ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_map,
pi_map_flags  map_flags,
size_t  offset,
size_t  size,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event,
void **  ret_map 
)

Implements mapping on the host using a BufferRead operation.

Mapped pointers are stored in the pi_mem object. If the buffer uses pinned host memory a pointer to that memory is returned and no read operation is done.

Definition at line 5069 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::allocMode_, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_, cuda_piEnqueueEventsWait(), cuda_piEnqueueMemBufferRead(), _pi_queue::get_context(), _pi_mem::mem_::buffer_mem_::get_map_ptr(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_::buffer_mem_::map_to_ptr(), _pi_mem::mem_, _pi_mem::mem_type_, PI_COMMAND_TYPE_MEM_BUFFER_MAP, PI_MAP_READ, and PI_MAP_WRITE.

Referenced by piPluginInit().

◆ cuda_piEnqueueMemBufferRead()

pi_result cuda_piEnqueueMemBufferRead ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_read,
size_t  offset,
size_t  size,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferReadRect()

pi_result cuda_piEnqueueMemBufferReadRect ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_read,
pi_buff_rect_offset  buffer_offset,
pi_buff_rect_offset  host_offset,
pi_buff_rect_region  region,
size_t  buffer_row_pitch,
size_t  buffer_slice_pitch,
size_t  host_row_pitch,
size_t  host_slice_pitch,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferWrite()

pi_result cuda_piEnqueueMemBufferWrite ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_write,
size_t  offset,
size_t  size,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemBufferWriteRect()

pi_result cuda_piEnqueueMemBufferWriteRect ( pi_queue  command_queue,
pi_mem  buffer,
pi_bool  blocking_write,
pi_buff_rect_offset  buffer_offset,
pi_buff_rect_offset  host_offset,
pi_buff_rect_region  region,
size_t  buffer_row_pitch,
size_t  buffer_slice_pitch,
size_t  host_row_pitch,
size_t  host_slice_pitch,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageCopy()

pi_result cuda_piEnqueueMemImageCopy ( pi_queue  command_queue,
pi_mem  src_image,
pi_mem  dst_image,
const size_t *  src_origin,
const size_t *  dst_origin,
const size_t *  region,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageFill()

pi_result cuda_piEnqueueMemImageFill ( pi_queue  ,
pi_mem  ,
const void *  ,
const size_t *  ,
const size_t *  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

\TODO Not implemented in CUDA.

Definition at line 5057 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEnqueueMemImageRead()

pi_result cuda_piEnqueueMemImageRead ( pi_queue  command_queue,
pi_mem  image,
pi_bool  blocking_read,
const size_t *  origin,
const size_t *  region,
size_t  row_pitch,
size_t  slice_pitch,
void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemImageWrite()

pi_result cuda_piEnqueueMemImageWrite ( pi_queue  command_queue,
pi_mem  image,
pi_bool  blocking_write,
const size_t *  origin,
const size_t *  region,
size_t  input_row_pitch,
size_t  input_slice_pitch,
const void *  ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueMemUnmap()

pi_result cuda_piEnqueueMemUnmap ( pi_queue  command_queue,
pi_mem  memobj,
void *  mapped_ptr,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

◆ cuda_piEnqueueNativeKernel()

pi_result cuda_piEnqueueNativeKernel ( pi_queue  ,
void(*)(void *)  ,
void *  ,
size_t  ,
pi_uint32  ,
const pi_mem ,
const void **  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

\TODO Not implemented

Definition at line 3379 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventCreate()

pi_result cuda_piEventCreate ( pi_context  ,
pi_event  
)

Definition at line 4017 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventGetInfo()

pi_result cuda_piEventGetInfo ( pi_event  event,
pi_event_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piEventGetProfilingInfo()

pi_result cuda_piEventGetProfilingInfo ( pi_event  event,
pi_profiling_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piEventRelease()

pi_result cuda_piEventRelease ( pi_event  event)

Definition at line 4107 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::assertion().

Referenced by piPluginInit().

◆ cuda_piEventRetain()

pi_result cuda_piEventRetain ( pi_event  event)

Definition at line 4095 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::assertion().

Referenced by piPluginInit().

◆ cuda_piEventSetCallback()

pi_result cuda_piEventSetCallback ( pi_event  ,
pi_int32  ,
pfn_notify  ,
void *   
)

Definition at line 4085 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventSetStatus()

pi_result cuda_piEventSetStatus ( pi_event  ,
pi_int32   
)

Definition at line 4090 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piEventsWait()

pi_result cuda_piEventsWait ( pi_uint32  num_events,
const pi_event event_list 
)

Definition at line 2953 of file pi_cuda.cpp.

References _pi_event::get_context().

Referenced by piPluginInit().

◆ cuda_piextContextCreateWithNativeHandle()

pi_result cuda_piextContextCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_uint32  num_devices,
const pi_device devices,
bool  ownNativeHandle,
pi_context piContext 
)

Created a PI context object from a CUDA context handle.

NOTE: The created PI object does not take ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI context object from.
[out]contextSet to the PI context object created from native handle.
Returns
TBD

Definition at line 2334 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextContextGetNativeHandle()

pi_result cuda_piextContextGetNativeHandle ( pi_context  context,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI context object.

Parameters
[in]contextThe PI context to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI context object.
Returns
PI_SUCCESS

Definition at line 2321 of file pi_cuda.cpp.

References _pi_context::get().

Referenced by piPluginInit().

◆ cuda_piextContextSetExtendedDeleter()

pi_result cuda_piextContextSetExtendedDeleter ( pi_context  context,
pi_context_extended_deleter  function,
void *  user_data 
)

Definition at line 1136 of file pi_cuda.cpp.

References _pi_context::set_extended_deleter().

Referenced by piPluginInit().

◆ cuda_piextDeviceCreateWithNativeHandle()

pi_result cuda_piextDeviceCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_platform  platform,
pi_device piDevice 
)

Created a PI device object from a CUDA device handle.

NOTE: The created PI object does not take ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI device object from.
[in]platformis the PI platform of the device.
[out]deviceSet to the PI device object created from native handle.
Returns
TBD

Definition at line 2205 of file pi_cuda.cpp.

References cuda_piPlatformsGet(), _pi_device::get(), and sycl::_V1::malloc().

Referenced by piPluginInit().

◆ cuda_piextDeviceGetNativeHandle()

pi_result cuda_piextDeviceGetNativeHandle ( pi_device  device,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI device object.

Parameters
[in]deviceThe PI device to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI device object.
Returns
PI_SUCCESS

Definition at line 2191 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextDeviceSelectBinary()

pi_result cuda_piextDeviceSelectBinary ( pi_device  device,
pi_device_binary binaries,
pi_uint32  num_binaries,
pi_uint32 selected_binary 
)
Returns
If available, the first binary that is PTX

Definition at line 1151 of file pi_cuda.cpp.

References __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64, and sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextEnqueueDeviceGlobalVariableRead()

pi_result cuda_piextEnqueueDeviceGlobalVariableRead ( pi_queue  queue,
pi_program  program,
const char *  name,
pi_bool  blocking_read,
size_t  count,
size_t  offset,
void *  dst,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Definition at line 5756 of file pi_cuda.cpp.

References cuda_piextUSMEnqueueMemcpy(), _pi_program::get(), and _pi_program::globalIDMD_.

Referenced by piPluginInit().

◆ cuda_piextEnqueueDeviceGlobalVariableWrite()

pi_result cuda_piextEnqueueDeviceGlobalVariableWrite ( pi_queue  queue,
pi_program  program,
const char *  name,
pi_bool  blocking_write,
size_t  count,
size_t  offset,
const void *  src,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

Definition at line 5718 of file pi_cuda.cpp.

References cuda_piextUSMEnqueueMemcpy(), _pi_program::get(), and _pi_program::globalIDMD_.

Referenced by piPluginInit().

◆ cuda_piextEnqueueReadHostPipe()

pi_result cuda_piextEnqueueReadHostPipe ( pi_queue  queue,
pi_program  program,
const char *  pipe_symbol,
pi_bool  blocking,
void *  ptr,
size_t  size,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

Host Pipes.

Definition at line 5795 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextEnqueueWriteHostPipe()

pi_result cuda_piextEnqueueWriteHostPipe ( pi_queue  queue,
pi_program  program,
const char *  pipe_symbol,
pi_bool  blocking,
void *  ptr,
size_t  size,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

Definition at line 5813 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextEventCreateWithNativeHandle()

pi_result cuda_piextEventCreateWithNativeHandle ( pi_native_handle  nativeHandle,
pi_context  context,
bool  ownNativeHandle,
pi_event event 
)

Created a PI event object from a CUDA event handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI event object from.
[out]eventSet to the PI event object created from native handle.
Returns
TBD

Definition at line 4259 of file pi_cuda.cpp.

References _pi_event::make_with_native().

Referenced by piPluginInit().

◆ cuda_piextEventGetNativeHandle()

pi_result cuda_piextEventGetNativeHandle ( pi_event  event,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI event object.

Parameters
[in]eventThe PI event to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI event object.
Returns
PI_SUCCESS on success. PI_ERROR_INVALID_EVENT if given a user event.

Definition at line 4245 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextGetDeviceFunctionPointer()

pi_result cuda_piextGetDeviceFunctionPointer ( [[maybe_unused] ] pi_device  device,
pi_program  program,
const char *  func_name,
pi_uint64 func_pointer_ret 
)

Definition at line 1179 of file pi_cuda.cpp.

References _pi_program::get(), _pi_program::get_context(), and _pi_context::get_device().

Referenced by piPluginInit().

◆ cuda_piextKernelCreateWithNativeHandle()

pi_result cuda_piextKernelCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
pi_program  ,
bool  ,
pi_kernel  
)

Definition at line 3386 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextKernelSetArgMemObj()

◆ cuda_piextKernelSetArgPointer()

pi_result cuda_piextKernelSetArgPointer ( pi_kernel  kernel,
pi_uint32  arg_index,
size_t  arg_size,
const void *  arg_value 
)

Definition at line 4007 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextKernelSetArgSampler()

pi_result cuda_piextKernelSetArgSampler ( pi_kernel  kernel,
pi_uint32  arg_index,
const pi_sampler arg_value 
)

Definition at line 3086 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextMemCreateWithNativeHandle()

pi_result cuda_piextMemCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
bool  ,
pi_mem  
)

Created a PI mem object from a CUDA mem handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI mem object from.
[in]contextThe PI context of the memory allocation.
[in]ownNativeHandleIndicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT.
[out]memSet to the PI mem object created from native handle.
Returns
TBD

Definition at line 2582 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextMemGetNativeHandle()

pi_result cuda_piextMemGetNativeHandle ( pi_mem  mem,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI mem object.

Parameters
[in]memThe PI mem to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI mem object.
Returns
PI_SUCCESS

Definition at line 2565 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), and _pi_mem::mem_.

Referenced by piPluginInit().

◆ cuda_piextMemImageCreateWithNativeHandle()

pi_result cuda_piextMemImageCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
bool  ,
const pi_image_format ,
const pi_image_desc ,
pi_mem  
)

Created a PI image mem object from a CUDA image mem handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]pi_native_handleThe native handle to create PI mem object from.
[in]pi_contextThe PI context of the memory allocation.
[in]ownNativeHandleBoolean indicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT.
[in]pi_image_formatThe format of the image.
[in]pi_image_descThe description information for the image.
[out]pi_memSet to the PI mem object created from native handle.
Returns
TBD

Definition at line 2602 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

◆ cuda_piextProgramCreateWithNativeHandle()

pi_result cuda_piextProgramCreateWithNativeHandle ( pi_native_handle  ,
pi_context  ,
bool  ,
pi_program  
)

Created a PI program object from a CUDA program handle.

TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI program object from.
[in]contextThe PI context of the program.
[out]programSet to the PI program object created from native handle.
Returns
TBD

Definition at line 3871 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextProgramGetNativeHandle()

pi_result cuda_piextProgramGetNativeHandle ( pi_program  program,
pi_native_handle nativeHandle 
)

Gets the native CUDA handle of a PI program object.

Parameters
[in]programThe PI program to get the native CUDA object of.
[out]nativeHandleSet to the native handle of the PI program object.
Returns
TBD

Definition at line 3856 of file pi_cuda.cpp.

References _pi_program::get().

Referenced by piPluginInit().

◆ cuda_piextProgramSetSpecializationConstant()

pi_result cuda_piextProgramSetSpecializationConstant ( pi_program  ,
pi_uint32  ,
size_t  ,
const void *   
)

Definition at line 3999 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextQueueCreate()

pi_result cuda_piextQueueCreate ( pi_context  Context,
pi_device  Device,
pi_queue_properties Properties,
pi_queue Queue 
)

Definition at line 2661 of file pi_cuda.cpp.

References cuda_piQueueCreate(), and PI_QUEUE_FLAGS.

Referenced by piPluginInit().

◆ cuda_piextQueueCreateWithNativeHandle()

pi_result cuda_piextQueueCreateWithNativeHandle ( pi_native_handle  nativeHandle,
int32_t  NativeHandleDesc,
pi_context  context,
pi_device  device,
bool  ownNativeHandle,
pi_queue_properties Properties,
pi_queue queue 
)

Created a PI queue object from a CUDA queue handle.

NOTE: The created PI object does not take ownership of the native handle.

Parameters
[in]nativeHandleThe native handle to create PI queue object from.
[in]nativeHandleDescInfo about the native handle.
[in]contextis the PI context of the queue.
[out]queueSet to the PI queue object created from native handle.
ownNativeHandletells if SYCL RT should assume the ownership of the native handle, if it can.
Returns
TBD

Definition at line 2820 of file pi_cuda.cpp.

References __SYCL_PI_CUDA_SYNC_WITH_DEFAULT, __SYCL_PI_CUDA_USE_DEFAULT_STREAM, sycl::_V1::detail::pi::die(), and _pi_context::get_device().

Referenced by piPluginInit().

◆ cuda_piextQueueGetNativeHandle()

pi_result cuda_piextQueueGetNativeHandle ( pi_queue  queue,
pi_native_handle nativeHandle,
int32_t *  NativeHandleDesc 
)

Gets the native CUDA handle of a PI queue object.

Parameters
[in]queueThe PI queue to get the native CUDA object of.
[in]NativeHandleDescPointer to additional native handle info.
[out]nativeHandleSet to the native handle of the PI queue object.
Returns
PI_SUCCESS

Definition at line 2799 of file pi_cuda.cpp.

References _pi_queue::get_context(), and _pi_queue::get_next_compute_stream().

Referenced by piPluginInit().

◆ cuda_piextUSMDeviceAlloc()

pi_result cuda_piextUSMDeviceAlloc ( void **  result_ptr,
pi_context  context,
[[maybe_unused] ] pi_device  device,
[[maybe_unused] ] pi_usm_mem_properties properties,
size_t  size,
[[maybe_unused] ] pi_uint32  alignment 
)

USM: Implements USM device allocations using a normal CUDA device pointer.

Definition at line 5208 of file pi_cuda.cpp.

References sycl::_V1::ext::oneapi::experimental::alignment.

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueueFill2D()

pi_result cuda_piextUSMEnqueueFill2D ( pi_queue  ,
void *  ,
size_t  ,
size_t  ,
const void *  ,
size_t  ,
size_t  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

Definition at line 5522 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueueMemAdvise()

◆ cuda_piextUSMEnqueueMemcpy()

pi_result cuda_piextUSMEnqueueMemcpy ( pi_queue  queue,
pi_bool  blocking,
void *  dst_ptr,
const void *  src_ptr,
size_t  size,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

◆ cuda_piextUSMEnqueueMemcpy2D()

pi_result cuda_piextUSMEnqueueMemcpy2D ( pi_queue  queue,
pi_bool  blocking,
void *  dst_ptr,
size_t  dst_pitch,
const void *  src_ptr,
size_t  src_pitch,
size_t  width,
size_t  height,
pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
pi_event event 
)

2D Memcpy API

Parameters
queueis the queue to submit to
blockingis whether this operation should block the host
dst_ptris the location the data will be copied
dst_pitchis the total width of the destination memory including padding
src_ptris the data to be copied
dst_pitchis the total width of the source memory including padding
widthis width in bytes of each row to be copied
heightis height the columns to be copied
num_events_in_waitlistis the number of events to wait on
events_waitlistis an array of events to wait on
eventis the event that represents this operation

Definition at line 5552 of file pi_cuda.cpp.

References _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), and PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT.

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueueMemset()

pi_result cuda_piextUSMEnqueueMemset ( pi_queue  queue,
void *  ptr,
pi_int32  value,
size_t  count,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

◆ cuda_piextUSMEnqueueMemset2D()

pi_result cuda_piextUSMEnqueueMemset2D ( pi_queue  ,
void *  ,
size_t  ,
int  ,
size_t  ,
size_t  ,
pi_uint32  ,
const pi_event ,
pi_event  
)

Definition at line 5531 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piextUSMEnqueuePrefetch()

pi_result cuda_piextUSMEnqueuePrefetch ( pi_queue  queue,
const void *  ptr,
size_t  size,
pi_usm_migration_flags  flags,
pi_uint32  num_events_in_waitlist,
const pi_event events_waitlist,
pi_event event 
)

◆ cuda_piextUSMFree()

pi_result cuda_piextUSMFree ( pi_context  context,
void *  ptr 
)

USM: Frees the given USM pointer associated with the context.

Definition at line 5258 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piextUSMGetMemAllocInfo()

pi_result cuda_piextUSMGetMemAllocInfo ( pi_context  context,
const void *  ptr,
pi_mem_alloc_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation.

Result must fit in void * PI_MEM_ALLOC_SIZE returns how big the queried pointer's allocation is in bytes. Result is a size_t. PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against

Parameters
contextis the pi_context
ptris the pointer to query
param_nameis the type of query to perform
param_value_sizeis the size of the result in bytes
param_valueis the result
param_value_size_retis how many bytes were written

Definition at line 5620 of file pi_cuda.cpp.

References cuda_piPlatformsGet(), getInfo(), PI_MEM_ALLOC_BASE_PTR, PI_MEM_ALLOC_DEVICE, PI_MEM_ALLOC_SIZE, PI_MEM_ALLOC_TYPE, PI_MEM_TYPE_DEVICE, PI_MEM_TYPE_HOST, PI_MEM_TYPE_SHARED, and PI_MEM_TYPE_UNKNOWN.

Referenced by piPluginInit().

◆ cuda_piextUSMHostAlloc()

pi_result cuda_piextUSMHostAlloc ( void **  result_ptr,
pi_context  context,
[[maybe_unused] ] pi_usm_mem_properties properties,
size_t  size,
[[maybe_unused] ] pi_uint32  alignment 
)

USM: Implements USM Host allocations using CUDA Pinned Memory.

Definition at line 5185 of file pi_cuda.cpp.

References sycl::_V1::ext::oneapi::experimental::alignment.

Referenced by piPluginInit().

◆ cuda_piextUSMSharedAlloc()

pi_result cuda_piextUSMSharedAlloc ( void **  result_ptr,
pi_context  context,
[[maybe_unused] ] pi_device  device,
[[maybe_unused] ] pi_usm_mem_properties properties,
size_t  size,
[[maybe_unused] ] pi_uint32  alignment 
)

USM: Implements USM Shared allocations using CUDA Managed Memory.

Definition at line 5233 of file pi_cuda.cpp.

References sycl::_V1::ext::oneapi::experimental::alignment.

Referenced by piPluginInit().

◆ cuda_piGetDeviceAndHostTimer()

pi_result cuda_piGetDeviceAndHostTimer ( pi_device  Device,
uint64_t *  DeviceTime,
uint64_t *  HostTime 
)

Definition at line 5843 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelCreate()

pi_result cuda_piKernelCreate ( pi_program  program,
const char *  kernel_name,
pi_kernel kernel 
)

Definition at line 2988 of file pi_cuda.cpp.

References _pi_program::get(), and _pi_program::get_context().

Referenced by piPluginInit().

◆ cuda_piKernelGetGroupInfo()

◆ cuda_piKernelGetInfo()

pi_result cuda_piKernelGetInfo ( pi_kernel  kernel,
pi_kernel_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piKernelGetSubGroupInfo()

pi_result cuda_piKernelGetSubGroupInfo ( pi_kernel  kernel,
pi_device  device,
pi_kernel_sub_group_info  param_name,
size_t  input_value_size,
const void *  input_value,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piKernelRelease()

pi_result cuda_piKernelRelease ( pi_kernel  kernel)

Definition at line 3975 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelRetain()

pi_result cuda_piKernelRetain ( pi_kernel  kernel)

Definition at line 3967 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelSetArg()

pi_result cuda_piKernelSetArg ( pi_kernel  kernel,
pi_uint32  arg_index,
size_t  arg_size,
const void *  arg_value 
)

Definition at line 3028 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piKernelSetExecInfo()

pi_result cuda_piKernelSetExecInfo ( pi_kernel  ,
pi_kernel_exec_info  ,
size_t  ,
const void *   
)

Definition at line 3994 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piMemBufferCreate()

pi_result cuda_piMemBufferCreate ( pi_context  context,
pi_mem_flags  flags,
size_t  size,
void *  host_ptr,
pi_mem ret_mem,
[[maybe_unused] ] const pi_mem_properties properties 
)

Creates a PI Memory object using a CUDA memory allocation.

Can trigger a manual copy depending on the mode. \TODO Implement USE_HOST_PTR using cuHostRegister

Definition at line 2355 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::mem_::buffer_mem_::copy_in, PI_MEM_FLAGS_HOST_PTR_ALLOC, PI_MEM_FLAGS_HOST_PTR_COPY, PI_MEM_FLAGS_HOST_PTR_USE, and _pi_mem::mem_::buffer_mem_::use_host_ptr.

Referenced by piPluginInit().

◆ cuda_piMemBufferPartition()

pi_result cuda_piMemBufferPartition ( pi_mem  parent_buffer,
pi_mem_flags  flags,
[[maybe_unused] ] pi_buffer_create_type  buffer_create_type,
void *  buffer_create_info,
pi_mem memObj 
)

Implements a buffer partition in the CUDA backend.

A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented as an offset over an existing CUDA allocation.

Definition at line 2495 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::context_, _pi_mem::mem_::buffer_mem_::get_size(), _pi_mem::mem_::buffer_mem_::hostPtr_, _pi_mem::is_buffer(), _pi_mem::is_sub_buffer(), _pi_mem::mem_, PI_BUFFER_CREATE_TYPE_REGION, PI_MEM_FLAGS_ACCESS_RW, and _pi_mem::mem_::buffer_mem_::ptr_.

Referenced by piPluginInit().

◆ cuda_piMemGetInfo()

pi_result cuda_piMemGetInfo ( pi_mem  ,
pi_mem_info  ,
size_t  ,
void *  ,
size_t *   
)

Definition at line 2555 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piMemImageCreate()

◆ cuda_piMemImageGetInfo()

pi_result cuda_piMemImageGetInfo ( pi_mem  ,
pi_image_info  ,
size_t  ,
void *  ,
size_t *   
)

\TODO Not implemented

Definition at line 3550 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piMemRelease()

pi_result cuda_piMemRelease ( pi_mem  memObj)

Decreases the reference count of the Mem object.

If this is zero, calls the relevant CUDA Free function

Returns
PI_SUCCESS unless deallocation error

Definition at line 2431 of file pi_cuda.cpp.

References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::mem_::buffer_mem_::copy_in, _pi_mem::decrement_reference_count(), sycl::_V1::detail::pi::die(), _pi_mem::is_sub_buffer(), _pi_mem::mem_type_, _pi_mem::surface, and _pi_mem::mem_::buffer_mem_::use_host_ptr.

Referenced by piPluginInit(), and _pi_mem::~_pi_mem().

◆ cuda_piMemRetain()

pi_result cuda_piMemRetain ( pi_mem  mem)

Definition at line 3556 of file pi_cuda.cpp.

References _pi_mem::get_reference_count(), and _pi_mem::increment_reference_count().

Referenced by piPluginInit().

◆ cuda_piPlatformGetInfo()

pi_result cuda_piPlatformGetInfo ( [[maybe_unused] ] pi_platform  platform,
pi_platform_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piPlatformsGet()

pi_result cuda_piPlatformsGet ( pi_uint32  num_entries,
pi_platform platforms,
pi_uint32 num_platforms 
)

Obtains the CUDA platform.

There is only one CUDA platform, and contains all devices on the system. Triggers the CUDA Driver initialization (cuInit) the first time, so this must be the first PI API called.

However because multiple devices in a context is not currently supported, place each device in a separate platform.

Definition at line 908 of file pi_cuda.cpp.

References cuda_piDeviceGetInfo(), PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, and PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES.

Referenced by cuda_piextDeviceCreateWithNativeHandle(), cuda_piextUSMGetMemAllocInfo(), and piPluginInit().

◆ cuda_piProgramBuild()

pi_result cuda_piProgramBuild ( pi_program  program,
[[maybe_unused] ] pi_uint32  num_devices,
[[maybe_unused] ] const pi_device device_list,
const char *  options,
[[maybe_unused] ] void(*)(pi_program program, void *user_data)  pfn_notify,
[[maybe_unused] ] void *  user_data 
)

Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels).

See _pi_program for implementation details.

Definition at line 3576 of file pi_cuda.cpp.

References _pi_program::build_program(), and _pi_program::get_context().

Referenced by piPluginInit().

◆ cuda_piProgramCompile()

pi_result cuda_piProgramCompile ( pi_program  program,
[[maybe_unused] ] pi_uint32  num_devices,
[[maybe_unused] ] const pi_device device_list,
const char *  options,
[[maybe_unused] ] pi_uint32  num_input_headers,
const pi_program input_headers,
const char **  header_include_names,
[[maybe_unused] ] void(*)(pi_program program, void *user_data)  pfn_notify,
[[maybe_unused] ] void *  user_data 
)

Creates a new program that is the outcome of the compilation of the headers and the program.

\TODO Implement asynchronous compilation

Definition at line 3754 of file pi_cuda.cpp.

References _pi_program::build_program(), and _pi_program::get_context().

Referenced by piPluginInit().

◆ cuda_piProgramCreate()

pi_result cuda_piProgramCreate ( pi_context  ,
const void *  ,
size_t  ,
pi_program  
)

\TODO Not implemented

Definition at line 3601 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by piPluginInit().

◆ cuda_piProgramCreateWithBinary()

pi_result cuda_piProgramCreateWithBinary ( pi_context  context,
[[maybe_unused] ] pi_uint32  num_devices,
[[maybe_unused] ] const pi_device device_list,
const size_t *  lengths,
const unsigned char **  binaries,
size_t  num_metadata_entries,
const pi_device_binary_property metadata,
pi_int32 binary_status,
pi_program program 
)

Loads images from a list of PTX or CUBIN binaries.

Note: No calls to CUDA driver API in this function, only store binaries for later.

Note: Only supports one device

Definition at line 3612 of file pi_cuda.cpp.

References _pi_device::get(), and _pi_context::get_device().

Referenced by piPluginInit().

◆ cuda_piProgramGetBuildInfo()

pi_result cuda_piProgramGetBuildInfo ( pi_program  program,
pi_device  device,
pi_program_build_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

◆ cuda_piProgramGetInfo()

◆ cuda_piProgramLink()

pi_result cuda_piProgramLink ( pi_context  context,
[[maybe_unused] ] pi_uint32  num_devices,
[[maybe_unused] ] const pi_device device_list,
const char *  options,
pi_uint32  num_input_programs,
const pi_program input_programs,
[[maybe_unused] ] void(*)(pi_program program, void *user_data)  pfn_notify,
[[maybe_unused] ] void *  user_data,
pi_program ret_program 
)

Creates a new PI program object that is the outcome of linking all input programs.

\TODO Implement linker options, requires mapping of OpenCL to CUDA

Definition at line 3691 of file pi_cuda.cpp.

References _pi_program::binary_, and _pi_program::binarySizeInBytes_.

Referenced by piPluginInit().

◆ cuda_piProgramRelease()

pi_result cuda_piProgramRelease ( pi_program  program)

Decreases the reference count of a pi_program object.

When the reference count reaches 0, it unloads the module from the context.

Definition at line 3821 of file pi_cuda.cpp.

References _pi_program::decrement_reference_count(), _pi_program::get(), _pi_program::get_context(), and _pi_program::get_reference_count().

Referenced by piPluginInit(), and _pi_kernel::~_pi_kernel().

◆ cuda_piProgramRetain()

pi_result cuda_piProgramRetain ( pi_program  program)

◆ cuda_piQueueCreate()

pi_result cuda_piQueueCreate ( pi_context  context,
pi_device  device,
pi_queue_properties  properties,
pi_queue queue 
)

Creates a pi_queue object on the CUDA backend.

Valid properties

  • __SYCL_PI_CUDA_USE_DEFAULT_STREAM -> CU_STREAM_DEFAULT
  • __SYCL_PI_CUDA_SYNC_WITH_DEFAULT -> CU_STREAM_NON_BLOCKING
    Returns
    Pi queue object mapping to a CUStream

Definition at line 2618 of file pi_cuda.cpp.

References __SYCL_PI_CUDA_SYNC_WITH_DEFAULT, __SYCL_PI_CUDA_USE_DEFAULT_STREAM, _pi_queue::default_num_compute_streams, _pi_queue::default_num_transfer_streams, _pi_context::get_device(), and PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE.

Referenced by cuda_piextQueueCreate(), and piPluginInit().

◆ cuda_piQueueFinish()

pi_result cuda_piQueueFinish ( pi_queue  command_queue)

Definition at line 2759 of file pi_cuda.cpp.

References _pi_queue::get_context(), and _pi_queue::sync_streams().

Referenced by piPluginInit().

◆ cuda_piQueueFlush()

pi_result cuda_piQueueFlush ( pi_queue  command_queue)

Definition at line 2787 of file pi_cuda.cpp.

Referenced by piPluginInit().

◆ cuda_piQueueGetInfo()

◆ cuda_piQueueRelease()

◆ cuda_piQueueRetain()

pi_result cuda_piQueueRetain ( pi_queue  command_queue)

Definition at line 2723 of file pi_cuda.cpp.

References _pi_queue::get_reference_count(), and _pi_queue::increment_reference_count().

Referenced by piPluginInit().

◆ cuda_piSamplerCreate()

pi_result cuda_piSamplerCreate ( pi_context  context,
const pi_sampler_properties sampler_properties,
pi_sampler result_sampler 
)

Creates a PI sampler object.

Parameters
[in]contextThe context the sampler is created for.
[in]sampler_propertiesThe properties for the sampler.
[out]result_samplerSet to the resulting sampler object.
Returns
PI_SUCCESS on success. PI_ERROR_INVALID_VALUE if given an invalid property or if there is multiple of properties from the same category.

Definition at line 4283 of file pi_cuda.cpp.

References PI_SAMPLER_ADDRESSING_MODE_CLAMP, PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, PI_SAMPLER_PROPERTIES_FILTER_MODE, PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS, and PI_TRUE.

Referenced by piPluginInit().

◆ cuda_piSamplerGetInfo()

pi_result cuda_piSamplerGetInfo ( pi_sampler  sampler,
pi_sampler_info  param_name,
size_t  param_value_size,
void *  param_value,
size_t *  param_value_size_ret 
)

Gets information from a PI sampler object.

Parameters
[in]samplerThe sampler to get the information from.
[in]param_nameThe name of the information to get.
[in]param_value_sizeThe size of the param_value.
[out]param_valueSet to information value.
[out]param_value_size_retSet to the size of the information value.
Returns
PI_SUCCESS on success.

Definition at line 4342 of file pi_cuda.cpp.

References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_sampler::context_, _pi_sampler::get_reference_count(), getInfo(), PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_INFO_ADDRESSING_MODE, PI_SAMPLER_INFO_CONTEXT, PI_SAMPLER_INFO_FILTER_MODE, PI_SAMPLER_INFO_NORMALIZED_COORDS, PI_SAMPLER_INFO_REFERENCE_COUNT, and _pi_sampler::props_.

Referenced by piPluginInit().

◆ cuda_piSamplerRelease()

pi_result cuda_piSamplerRelease ( pi_sampler  sampler)

Releases a PI sampler object, decrementing its reference count.

If the reference count reaches zero, the sampler object is destroyed.

Parameters
[in]samplerThe sampler to decrement the reference count of.
Returns
PI_SUCCESS.

Definition at line 4395 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::assertion(), _pi_sampler::decrement_reference_count(), and _pi_sampler::get_reference_count().

Referenced by piPluginInit().

◆ cuda_piSamplerRetain()

pi_result cuda_piSamplerRetain ( pi_sampler  sampler)

Retains a PI sampler object, incrementing its reference count.

Parameters
[in]samplerThe sampler to increment the reference count of.
Returns
PI_SUCCESS.

Definition at line 4383 of file pi_cuda.cpp.

References _pi_sampler::increment_reference_count().

Referenced by piPluginInit().

◆ cuda_piTearDown()

pi_result cuda_piTearDown ( void *  )

Definition at line 5838 of file pi_cuda.cpp.

References disableCUDATracing().

Referenced by piPluginInit().

◆ disableCUDATracing()

void disableCUDATracing ( )

Definition at line 103 of file tracing.cpp.

References CUDA_CALL_STREAM_NAME, and CUDA_DEBUG_STREAM_NAME.

Referenced by cuda_piTearDown().

◆ enableCUDATracing()

void enableCUDATracing ( )

Definition at line 72 of file tracing.cpp.

References CUDA_CALL_STREAM_NAME, CUDA_DEBUG_STREAM_NAME, GMajVer, GMinVer, and GVerStr.

Referenced by piPluginInit().

◆ enqueueEventWait()

pi_result enqueueEventWait ( pi_queue  queue,
pi_event  event 
)

Definition at line 762 of file pi_cuda.cpp.

References _pi_queue::for_each_stream().

◆ getKernelNames()

std::string getKernelNames ( pi_program  )

Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this.

Note: This is currently only being used by the SYCL program class for the has_kernel method, so an alternative would be to move the has_kernel query to PI and use cuModuleGetFunction to check for a kernel. Note: Another alternative is to add kernel names as metadata, like with reqd_work_group_size.

Definition at line 888 of file pi_cuda.cpp.

References sycl::_V1::detail::pi::die().

Referenced by cuda_piProgramGetInfo().

◆ imageElementByteSize()

static size_t imageElementByteSize ( CUDA_ARRAY_DESCRIPTOR  array_desc)
static

◆ piPluginInit()

pi_result piPluginInit ( pi_plugin PluginInit)

Definition at line 5870 of file pi_cuda.cpp.

References _PI_CL, _PI_PLUGIN_VERSION_CHECK, cuda_piclProgramCreateWithSource(), cuda_piContextCreate(), cuda_piContextGetInfo(), cuda_piContextRelease(), cuda_piContextRetain(), cuda_piDeviceGetInfo(), cuda_piDevicePartition(), cuda_piDeviceRelease(), cuda_piDeviceRetain(), cuda_piDevicesGet(), cuda_piEnqueueEventsWait(), cuda_piEnqueueEventsWaitWithBarrier(), cuda_piEnqueueKernelLaunch(), cuda_piEnqueueMemBufferCopy(), cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferFill(), cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemBufferRead(), cuda_piEnqueueMemBufferReadRect(), cuda_piEnqueueMemBufferWrite(), cuda_piEnqueueMemBufferWriteRect(), cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageFill(), cuda_piEnqueueMemImageRead(), cuda_piEnqueueMemImageWrite(), cuda_piEnqueueMemUnmap(), cuda_piEnqueueNativeKernel(), cuda_piEventCreate(), cuda_piEventGetInfo(), cuda_piEventGetProfilingInfo(), cuda_piEventRelease(), cuda_piEventRetain(), cuda_piEventSetCallback(), cuda_piEventSetStatus(), cuda_piEventsWait(), cuda_piextContextCreateWithNativeHandle(), cuda_piextContextGetNativeHandle(), cuda_piextContextSetExtendedDeleter(), cuda_piextDeviceCreateWithNativeHandle(), cuda_piextDeviceGetNativeHandle(), cuda_piextDeviceSelectBinary(), cuda_piextEnqueueDeviceGlobalVariableRead(), cuda_piextEnqueueDeviceGlobalVariableWrite(), cuda_piextEnqueueReadHostPipe(), cuda_piextEnqueueWriteHostPipe(), cuda_piextEventCreateWithNativeHandle(), cuda_piextEventGetNativeHandle(), cuda_piextGetDeviceFunctionPointer(), cuda_piextKernelCreateWithNativeHandle(), cuda_piextKernelSetArgMemObj(), cuda_piextKernelSetArgPointer(), cuda_piextKernelSetArgSampler(), cuda_piextMemCreateWithNativeHandle(), cuda_piextMemGetNativeHandle(), cuda_piextProgramCreateWithNativeHandle(), cuda_piextProgramGetNativeHandle(), cuda_piextProgramSetSpecializationConstant(), cuda_piextQueueCreate(), cuda_piextQueueCreateWithNativeHandle(), cuda_piextQueueGetNativeHandle(), cuda_piextUSMDeviceAlloc(), cuda_piextUSMEnqueueFill2D(), cuda_piextUSMEnqueueMemAdvise(), cuda_piextUSMEnqueueMemcpy(), cuda_piextUSMEnqueueMemcpy2D(), cuda_piextUSMEnqueueMemset(), cuda_piextUSMEnqueueMemset2D(), cuda_piextUSMEnqueuePrefetch(), cuda_piextUSMFree(), cuda_piextUSMGetMemAllocInfo(), cuda_piextUSMHostAlloc(), cuda_piextUSMSharedAlloc(), cuda_piGetDeviceAndHostTimer(), cuda_piKernelCreate(), cuda_piKernelGetGroupInfo(), cuda_piKernelGetInfo(), cuda_piKernelGetSubGroupInfo(), cuda_piKernelRelease(), cuda_piKernelRetain(), cuda_piKernelSetArg(), cuda_piKernelSetExecInfo(), cuda_piMemBufferCreate(), cuda_piMemBufferPartition(), cuda_piMemGetInfo(), cuda_piMemImageCreate(), cuda_piMemImageGetInfo(), cuda_piMemRelease(), cuda_piMemRetain(), cuda_piPlatformGetInfo(), cuda_piPlatformsGet(), cuda_piProgramBuild(), cuda_piProgramCompile(), cuda_piProgramCreate(), cuda_piProgramCreateWithBinary(), cuda_piProgramGetBuildInfo(), cuda_piProgramGetInfo(), cuda_piProgramLink(), cuda_piProgramRelease(), cuda_piProgramRetain(), cuda_piQueueCreate(), cuda_piQueueFinish(), cuda_piQueueFlush(), cuda_piQueueGetInfo(), cuda_piQueueRelease(), cuda_piQueueRetain(), cuda_piSamplerCreate(), cuda_piSamplerGetInfo(), cuda_piSamplerRelease(), cuda_piSamplerRetain(), cuda_piTearDown(), enableCUDATracing(), piclProgramCreateWithSource(), piContextCreate(), piContextGetInfo(), piContextRelease(), piContextRetain(), piDeviceGetInfo(), piDevicePartition(), piDeviceRelease(), piDeviceRetain(), piDevicesGet(), piEnqueueEventsWait(), piEnqueueEventsWaitWithBarrier(), piEnqueueKernelLaunch(), piEnqueueMemBufferCopy(), piEnqueueMemBufferCopyRect(), piEnqueueMemBufferFill(), piEnqueueMemBufferMap(), piEnqueueMemBufferRead(), piEnqueueMemBufferReadRect(), piEnqueueMemBufferWrite(), piEnqueueMemBufferWriteRect(), piEnqueueMemImageCopy(), piEnqueueMemImageFill(), piEnqueueMemImageRead(), piEnqueueMemImageWrite(), piEnqueueMemUnmap(), piEnqueueNativeKernel(), piEventCreate, piEventGetInfo(), piEventGetProfilingInfo(), piEventRelease(), piEventRetain(), piEventSetCallback(), piEventSetStatus(), piEventsWait(), piextContextCreateWithNativeHandle(), piextContextGetNativeHandle(), piextContextSetExtendedDeleter(), piextDeviceCreateWithNativeHandle(), piextDeviceGetNativeHandle(), piextDeviceSelectBinary(), piextEnqueueDeviceGlobalVariableRead(), piextEnqueueDeviceGlobalVariableWrite(), piextEnqueueReadHostPipe(), piextEnqueueWriteHostPipe(), piextEventCreateWithNativeHandle(), piextEventGetNativeHandle(), piextGetDeviceFunctionPointer(), piextKernelCreateWithNativeHandle(), piextKernelSetArgMemObj(), piextKernelSetArgPointer(), piextKernelSetArgSampler(), piextMemCreateWithNativeHandle(), piextMemGetNativeHandle(), piextProgramCreateWithNativeHandle(), piextProgramGetNativeHandle(), piextProgramSetSpecializationConstant(), piextQueueCreate(), piextQueueCreateWithNativeHandle(), piextQueueGetNativeHandle(), piextUSMDeviceAlloc(), piextUSMEnqueueFill2D(), piextUSMEnqueueMemAdvise(), piextUSMEnqueueMemcpy(), piextUSMEnqueueMemcpy2D(), piextUSMEnqueueMemset(), piextUSMEnqueueMemset2D(), piextUSMEnqueuePrefetch(), piextUSMFree(), piextUSMGetMemAllocInfo(), piextUSMHostAlloc(), piextUSMSharedAlloc(), _pi_plugin::PiFunctionTable, piGetDeviceAndHostTimer(), piKernelCreate(), piKernelGetGroupInfo(), piKernelGetInfo(), piKernelGetSubGroupInfo(), piKernelRelease(), piKernelRetain(), piKernelSetArg(), piKernelSetExecInfo(), piMemBufferCreate(), piMemBufferPartition(), piMemGetInfo(), piMemImageCreate(), piMemImageGetInfo(), piMemRelease(), piMemRetain(), piPlatformGetInfo(), piPlatformsGet(), piPluginGetBackendOption(), piPluginGetLastError(), piProgramBuild(), piProgramCompile(), piProgramCreate(), piProgramCreateWithBinary(), piProgramGetBuildInfo(), piProgramGetInfo(), piProgramLink(), piProgramRelease(), piProgramRetain(), piQueueCreate(), piQueueFinish(), piQueueFlush(), piQueueGetInfo(), piQueueRelease(), piQueueRetain(), piSamplerCreate(), piSamplerGetInfo(), piSamplerRelease(), piSamplerRetain(), piTearDown(), _pi_plugin::PiVersion, _pi_plugin::PluginVersion, and SupportedVersion.

◆ splitMetadataName()

std::pair<std::string, std::string> splitMetadataName ( const std::string &  metadataName)

Definition at line 781 of file pi_cuda.cpp.

Referenced by _pi_program::set_metadata().

Variable Documentation

◆ SupportedVersion

const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING

Definition at line 5868 of file pi_cuda.cpp.

Referenced by piPluginInit().