#include <pi_cuda.hpp>
#include <sycl/detail/cuda_definitions.hpp>
#include <sycl/detail/defines.hpp>
#include <sycl/detail/pi.hpp>
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cuda.h>
#include <cuda_device_runtime_api.h>
#include <limits>
#include <memory>
#include <mutex>
#include <regex>
#include <string_view>
Go to the source code of this file.
Namespaces | |
sycl | |
---— Error handling, matching OpenCL plugin semantics. | |
sycl::_V1 | |
sycl::_V1::detail | |
sycl::_V1::detail::pi | |
Macros | |
#define | _PI_CL(pi_api, cuda_api) (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api); |
Functions | |
void | enableCUDATracing () |
void | disableCUDATracing () |
void | sycl::_V1::detail::pi::die (const char *Message) |
void | sycl::_V1::detail::pi::cuPrint (const char *Message) |
void | sycl::_V1::detail::pi::assertion (bool Condition, const char *Message=nullptr) |
pi_result | cuda_piEnqueueEventsWait (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
Enqueues a wait on the given CUstream for all events. More... | |
pi_result | cuda_piEnqueueEventsWaitWithBarrier (pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue. More... | |
pi_result | cuda_piEventRelease (pi_event event) |
pi_result | cuda_piEventRetain (pi_event event) |
pi_result | enqueueEventWait (pi_queue queue, pi_event event) |
std::pair< std::string, std::string > | splitMetadataName (const std::string &metadataName) |
std::string | getKernelNames (pi_program) |
Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this. More... | |
pi_result | cuda_piDeviceGetInfo (pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piPlatformsGet (pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms) |
Obtains the CUDA platform. More... | |
pi_result | cuda_piPlatformGetInfo ([[maybe_unused]] pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piDevicesGet (pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices) |
pi_result | cuda_piDeviceRetain (pi_device) |
pi_result | cuda_piContextGetInfo (pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piContextRetain (pi_context context) |
pi_result | cuda_piextContextSetExtendedDeleter (pi_context context, pi_context_extended_deleter function, void *user_data) |
pi_result | cuda_piDevicePartition (pi_device, const pi_device_partition_property *, pi_uint32, pi_device *, pi_uint32 *) |
Not applicable to CUDA, devices cannot be partitioned. More... | |
pi_result | cuda_piextDeviceSelectBinary (pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary) |
pi_result | cuda_piextGetDeviceFunctionPointer ([[maybe_unused]] pi_device device, pi_program program, const char *func_name, pi_uint64 *func_pointer_ret) |
pi_result | cuda_piDeviceRelease (pi_device) |
pi_result | cuda_piextDeviceGetNativeHandle (pi_device device, pi_native_handle *nativeHandle) |
Gets the native CUDA handle of a PI device object. More... | |
pi_result | cuda_piextDeviceCreateWithNativeHandle (pi_native_handle nativeHandle, pi_platform platform, pi_device *piDevice) |
Created a PI device object from a CUDA device handle. More... | |
pi_result | cuda_piContextCreate ([[maybe_unused]] const pi_context_properties *properties, [[maybe_unused]] pi_uint32 num_devices, const pi_device *devices, [[maybe_unused]] void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data), [[maybe_unused]] void *user_data, pi_context *retcontext) |
Create a PI CUDA context. More... | |
pi_result | cuda_piContextRelease (pi_context ctxt) |
pi_result | cuda_piextContextGetNativeHandle (pi_context context, pi_native_handle *nativeHandle) |
Gets the native CUDA handle of a PI context object. More... | |
pi_result | cuda_piextContextCreateWithNativeHandle (pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext) |
Created a PI context object from a CUDA context handle. More... | |
pi_result | cuda_piMemBufferCreate (pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, [[maybe_unused]] const pi_mem_properties *properties) |
Creates a PI Memory object using a CUDA memory allocation. More... | |
pi_result | cuda_piMemRelease (pi_mem memObj) |
Decreases the reference count of the Mem object. More... | |
pi_result | cuda_piMemBufferPartition (pi_mem parent_buffer, pi_mem_flags flags, [[maybe_unused]] pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *memObj) |
Implements a buffer partition in the CUDA backend. More... | |
pi_result | cuda_piMemGetInfo (pi_mem, pi_mem_info, size_t, void *, size_t *) |
pi_result | cuda_piextMemGetNativeHandle (pi_mem mem, pi_native_handle *nativeHandle) |
Gets the native CUDA handle of a PI mem object. More... | |
pi_result | cuda_piextMemCreateWithNativeHandle (pi_native_handle, pi_context, bool, pi_mem *) |
Created a PI mem object from a CUDA mem handle. More... | |
pi_result | cuda_piextMemImageCreateWithNativeHandle (pi_native_handle, pi_context, bool, const pi_image_format *, const pi_image_desc *, pi_mem *) |
Created a PI image mem object from a CUDA image mem handle. More... | |
pi_result | cuda_piQueueCreate (pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue) |
Creates a pi_queue object on the CUDA backend. More... | |
pi_result | cuda_piextQueueCreate (pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue) |
pi_result | cuda_piQueueGetInfo (pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piQueueRetain (pi_queue command_queue) |
pi_result | cuda_piQueueRelease (pi_queue command_queue) |
pi_result | cuda_piQueueFinish (pi_queue command_queue) |
pi_result | cuda_piQueueFlush (pi_queue command_queue) |
pi_result | cuda_piextQueueGetNativeHandle (pi_queue queue, pi_native_handle *nativeHandle, int32_t *NativeHandleDesc) |
Gets the native CUDA handle of a PI queue object. More... | |
pi_result | cuda_piextQueueCreateWithNativeHandle (pi_native_handle nativeHandle, int32_t NativeHandleDesc, pi_context context, pi_device device, bool ownNativeHandle, pi_queue_properties *Properties, pi_queue *queue) |
Created a PI queue object from a CUDA queue handle. More... | |
pi_result | cuda_piEnqueueMemBufferWrite (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemBufferRead (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEventsWait (pi_uint32 num_events, const pi_event *event_list) |
pi_result | cuda_piKernelCreate (pi_program program, const char *kernel_name, pi_kernel *kernel) |
pi_result | cuda_piKernelSetArg (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value) |
pi_result | cuda_piextKernelSetArgMemObj (pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value) |
pi_result | cuda_piextKernelSetArgSampler (pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value) |
pi_result | cuda_piKernelGetGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piEnqueueKernelLaunch (pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueNativeKernel (pi_queue, void(*)(void *), void *, size_t, pi_uint32, const pi_mem *, const void **, pi_uint32, const pi_event *, pi_event *) |
\TODO Not implemented More... | |
pi_result | cuda_piextKernelCreateWithNativeHandle (pi_native_handle, pi_context, pi_program, bool, pi_kernel *) |
pi_result | cuda_piMemImageCreate (pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem) |
\TODO Not implemented More... | |
pi_result | cuda_piMemImageGetInfo (pi_mem, pi_image_info, size_t, void *, size_t *) |
\TODO Not implemented More... | |
pi_result | cuda_piMemRetain (pi_mem mem) |
pi_result | cuda_piclProgramCreateWithSource (pi_context, pi_uint32, const char **, const size_t *, pi_program *) |
Not used as CUDA backend only creates programs from binary. More... | |
pi_result | cuda_piProgramBuild (pi_program program, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const char *options, [[maybe_unused]] void(*pfn_notify)(pi_program program, void *user_data), [[maybe_unused]] void *user_data) |
Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels). More... | |
pi_result | cuda_piProgramCreate (pi_context, const void *, size_t, pi_program *) |
\TODO Not implemented More... | |
pi_result | cuda_piProgramCreateWithBinary (pi_context context, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *program) |
Loads images from a list of PTX or CUBIN binaries. More... | |
pi_result | cuda_piProgramGetInfo (pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piProgramLink (pi_context context, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, [[maybe_unused]] void(*pfn_notify)(pi_program program, void *user_data), [[maybe_unused]] void *user_data, pi_program *ret_program) |
Creates a new PI program object that is the outcome of linking all input programs. More... | |
pi_result | cuda_piProgramCompile (pi_program program, [[maybe_unused]] pi_uint32 num_devices, [[maybe_unused]] const pi_device *device_list, const char *options, [[maybe_unused]] pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, [[maybe_unused]] void(*pfn_notify)(pi_program program, void *user_data), [[maybe_unused]] void *user_data) |
Creates a new program that is the outcome of the compilation of the headers and the program. More... | |
pi_result | cuda_piProgramGetBuildInfo (pi_program program, pi_device device, pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piProgramRetain (pi_program program) |
pi_result | cuda_piProgramRelease (pi_program program) |
Decreases the reference count of a pi_program object. More... | |
pi_result | cuda_piextProgramGetNativeHandle (pi_program program, pi_native_handle *nativeHandle) |
Gets the native CUDA handle of a PI program object. More... | |
pi_result | cuda_piextProgramCreateWithNativeHandle (pi_native_handle, pi_context, bool, pi_program *) |
Created a PI program object from a CUDA program handle. More... | |
pi_result | cuda_piKernelGetInfo (pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piKernelGetSubGroupInfo (pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piKernelRetain (pi_kernel kernel) |
pi_result | cuda_piKernelRelease (pi_kernel kernel) |
pi_result | cuda_piKernelSetExecInfo (pi_kernel, pi_kernel_exec_info, size_t, const void *) |
pi_result | cuda_piextProgramSetSpecializationConstant (pi_program, pi_uint32, size_t, const void *) |
pi_result | cuda_piextKernelSetArgPointer (pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value) |
pi_result | cuda_piEventCreate (pi_context, pi_event *) |
pi_result | cuda_piEventGetInfo (pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
pi_result | cuda_piEventGetProfilingInfo (pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
Obtain profiling information from PI CUDA events \TODO Timings from CUDA are only elapsed time. More... | |
pi_result | cuda_piEventSetCallback (pi_event, pi_int32, pfn_notify, void *) |
pi_result | cuda_piEventSetStatus (pi_event, pi_int32) |
pi_result | cuda_piextEventGetNativeHandle (pi_event event, pi_native_handle *nativeHandle) |
Gets the native CUDA handle of a PI event object. More... | |
pi_result | cuda_piextEventCreateWithNativeHandle (pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *event) |
Created a PI event object from a CUDA event handle. More... | |
pi_result | cuda_piSamplerCreate (pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler) |
Creates a PI sampler object. More... | |
pi_result | cuda_piSamplerGetInfo (pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
Gets information from a PI sampler object. More... | |
pi_result | cuda_piSamplerRetain (pi_sampler sampler) |
Retains a PI sampler object, incrementing its reference count. More... | |
pi_result | cuda_piSamplerRelease (pi_sampler sampler) |
Releases a PI sampler object, decrementing its reference count. More... | |
static pi_result | commonEnqueueMemBufferCopyRect (CUstream cu_stream, pi_buff_rect_region region, const void *src_ptr, const CUmemorytype_enum src_type, pi_buff_rect_offset src_offset, size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr, const CUmemorytype_enum dst_type, pi_buff_rect_offset dst_offset, size_t dst_row_pitch, size_t dst_slice_pitch) |
General 3D memory copy operation. More... | |
pi_result | cuda_piEnqueueMemBufferReadRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemBufferWriteRect (pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemBufferCopy (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemBufferCopyRect (pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemBufferFill (pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
static size_t | imageElementByteSize (CUDA_ARRAY_DESCRIPTOR array_desc) |
static pi_result | commonEnqueueMemImageNDCopy (CUstream cu_stream, pi_mem_type img_type, const size_t *region, const void *src_ptr, const CUmemorytype_enum src_type, const size_t *src_offset, void *dst_ptr, const CUmemorytype_enum dst_type, const size_t *dst_offset) |
General ND memory copy operation for images (where N > 1). More... | |
pi_result | cuda_piEnqueueMemImageRead (pi_queue command_queue, pi_mem image, pi_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemImageWrite (pi_queue command_queue, pi_mem image, pi_bool blocking_write, const size_t *origin, const size_t *region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemImageCopy (pi_queue command_queue, pi_mem src_image, pi_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piEnqueueMemImageFill (pi_queue, pi_mem, const void *, const size_t *, const size_t *, pi_uint32, const pi_event *, pi_event *) |
\TODO Not implemented in CUDA. More... | |
pi_result | cuda_piEnqueueMemBufferMap (pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map) |
Implements mapping on the host using a BufferRead operation. More... | |
pi_result | cuda_piEnqueueMemUnmap (pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
Implements the unmap from the host, using a BufferWrite operation. More... | |
pi_result | cuda_piextUSMHostAlloc (void **result_ptr, pi_context context, [[maybe_unused]] pi_usm_mem_properties *properties, size_t size, [[maybe_unused]] pi_uint32 alignment) |
USM: Implements USM Host allocations using CUDA Pinned Memory. More... | |
pi_result | cuda_piextUSMDeviceAlloc (void **result_ptr, pi_context context, [[maybe_unused]] pi_device device, [[maybe_unused]] pi_usm_mem_properties *properties, size_t size, [[maybe_unused]] pi_uint32 alignment) |
USM: Implements USM device allocations using a normal CUDA device pointer. More... | |
pi_result | cuda_piextUSMSharedAlloc (void **result_ptr, pi_context context, [[maybe_unused]] pi_device device, [[maybe_unused]] pi_usm_mem_properties *properties, size_t size, [[maybe_unused]] pi_uint32 alignment) |
USM: Implements USM Shared allocations using CUDA Managed Memory. More... | |
pi_result | cuda_piextUSMFree (pi_context context, void *ptr) |
USM: Frees the given USM pointer associated with the context. More... | |
pi_result | cuda_piextUSMEnqueueMemset (pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) |
pi_result | cuda_piextUSMEnqueueMemcpy (pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) |
pi_result | cuda_piextUSMEnqueuePrefetch (pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) |
pi_result | cuda_piextUSMEnqueueMemAdvise (pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event) |
USM: memadvise API to govern behavior of automatic migration mechanisms. More... | |
pi_result | cuda_piextUSMEnqueueFill2D (pi_queue, void *, size_t, size_t, const void *, size_t, size_t, pi_uint32, const pi_event *, pi_event *) |
pi_result | cuda_piextUSMEnqueueMemset2D (pi_queue, void *, size_t, int, size_t, size_t, pi_uint32, const pi_event *, pi_event *) |
pi_result | cuda_piextUSMEnqueueMemcpy2D (pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
2D Memcpy API More... | |
pi_result | cuda_piextUSMGetMemAllocInfo (pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) |
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation. More... | |
pi_result | cuda_piextEnqueueDeviceGlobalVariableWrite (pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piextEnqueueDeviceGlobalVariableRead (pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) |
pi_result | cuda_piextEnqueueReadHostPipe (pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) |
Host Pipes. More... | |
pi_result | cuda_piextEnqueueWriteHostPipe (pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) |
pi_result | cuda_piTearDown (void *) |
pi_result | cuda_piGetDeviceAndHostTimer (pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) |
pi_result | piPluginInit (pi_plugin *PluginInit) |
Variables | |
const char | SupportedVersion [] = _PI_CUDA_PLUGIN_VERSION_STRING |
Implementation of CUDA Plugin.
Definition in file pi_cuda.cpp.
#define _PI_CL | ( | pi_api, | |
cuda_api | |||
) | (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api); |
|
static |
General 3D memory copy operation.
This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is on the device, src_ptr and/or dst_ptr must be a pointer to a CUdeviceptr
Definition at line 4417 of file pi_cuda.cpp.
References pi_buff_rect_region_struct::depth_scalar, pi_buff_rect_region_struct::height_scalar, pi_buff_rect_region_struct::width_bytes, pi_buff_rect_offset_struct::x_bytes, pi_buff_rect_offset_struct::y_scalar, and pi_buff_rect_offset_struct::z_scalar.
Referenced by cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferReadRect(), and cuda_piEnqueueMemBufferWriteRect().
|
static |
General ND memory copy operation for images (where N > 1).
This function requires the corresponding CUDA context to be at the top of the context stack If the source and/or destination is an array, src_ptr and/or dst_ptr must be a pointer to a CUarray
Definition at line 4789 of file pi_cuda.cpp.
References PI_MEM_TYPE_IMAGE2D, and PI_MEM_TYPE_IMAGE3D.
Referenced by cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), and cuda_piEnqueueMemImageWrite().
pi_result cuda_piclProgramCreateWithSource | ( | pi_context | , |
pi_uint32 | , | ||
const char ** | , | ||
const size_t * | , | ||
pi_program * | |||
) |
Not used as CUDA backend only creates programs from binary.
See cuda_piclProgramCreateWithBinary.
Definition at line 3566 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::cuPrint().
Referenced by piPluginInit().
pi_result cuda_piContextCreate | ( | [[maybe_unused] ] const pi_context_properties * | properties, |
[[maybe_unused] ] pi_uint32 | num_devices, | ||
const pi_device * | devices, | ||
[[maybe_unused] ] void(*)(const char *errinfo, const void *private_info, size_t cb, void *user_data) | pfn_notify, | ||
[[maybe_unused] ] void * | user_data, | ||
pi_context * | retcontext | ||
) |
Create a PI CUDA context.
By default creates a scoped context and keeps the last active CUDA context on top of the CUDA context stack. With the __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE creates a primary CUDA context and activates it on the CUDA context stack.
[in] | properties | 0 terminated array of key/id-value combinations. Can be nullptr. Only accepts property key/id __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY with a pi_bool value. |
[in] | num_devices | Number of devices to create the context for. |
[in] | devices | Devices to create the context for. |
[in] | pfn_notify | Callback, currently unused. |
[in] | user_data | User data for callback. |
[out] | retcontext | Set to created context on success. |
Definition at line 2273 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piContextGetInfo | ( | pi_context | context, |
pi_context_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 1090 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_context::get_device(), _pi_context::get_reference_count(), getInfo(), PI_CONTEXT_INFO_DEVICES, PI_CONTEXT_INFO_NUM_DEVICES, PI_CONTEXT_INFO_REFERENCE_COUNT, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT, PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT, and setErrorMessage().
Referenced by piPluginInit().
pi_result cuda_piContextRelease | ( | pi_context | ctxt | ) |
Definition at line 2302 of file pi_cuda.cpp.
References _pi_context::decrement_reference_count(), and _pi_context::invoke_extended_deleters().
Referenced by piPluginInit(), _pi_event::~_pi_event(), _pi_kernel::~_pi_kernel(), _pi_mem::~_pi_mem(), _pi_program::~_pi_program(), and _pi_queue::~_pi_queue().
pi_result cuda_piContextRetain | ( | pi_context | context | ) |
Definition at line 1128 of file pi_cuda.cpp.
References _pi_context::get_reference_count(), and _pi_context::increment_reference_count().
Referenced by piPluginInit().
pi_result cuda_piDeviceGetInfo | ( | pi_device | device, |
pi_device_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 1206 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, sycl::_V1::detail::pi::assertion(), sycl::_V1::detail::pi::cuPrint(), sycl::_V1::detail::pi::die(), getInfo(), getInfoArray(), min(), PI_DEVICE_EXEC_CAPABILITIES_KERNEL, PI_DEVICE_INFO_ADDRESS_BITS, PI_DEVICE_INFO_ATOMIC_64, PI_DEVICE_INFO_AVAILABLE, PI_DEVICE_INFO_BACKEND_VERSION, PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, PI_DEVICE_INFO_BUILT_IN_KERNELS, PI_DEVICE_INFO_COMPILER_AVAILABLE, PI_DEVICE_INFO_DEVICE_ID, PI_DEVICE_INFO_DOUBLE_FP_CONFIG, PI_DEVICE_INFO_DRIVER_VERSION, PI_DEVICE_INFO_ENDIAN_LITTLE, PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT, PI_DEVICE_INFO_EXECUTION_CAPABILITIES, PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT, PI_DEVICE_INFO_EXTENSIONS, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE, PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_SIZE, PI_DEVICE_INFO_GPU_EU_COUNT, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, PI_DEVICE_INFO_GPU_SLICES, PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, PI_DEVICE_INFO_HALF_FP_CONFIG, PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_SRGB, PI_DEVICE_INFO_IMAGE_SUPPORT, PI_DEVICE_INFO_LINKER_AVAILABLE, PI_DEVICE_INFO_LOCAL_MEM_SIZE, PI_DEVICE_INFO_LOCAL_MEM_TYPE, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_MAX_COMPUTE_UNITS, PI_DEVICE_INFO_MAX_CONSTANT_ARGS, PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE, PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, PI_DEVICE_INFO_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, PI_DEVICE_INFO_MAX_SAMPLERS, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, PI_DEVICE_INFO_NAME, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_OPENCL_C_VERSION, PI_DEVICE_INFO_PARENT_DEVICE, PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, PI_DEVICE_INFO_PARTITION_PROPERTIES, PI_DEVICE_INFO_PARTITION_TYPE, PI_DEVICE_INFO_PCI_ADDRESS, PI_DEVICE_INFO_PLATFORM, PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, PI_DEVICE_INFO_PROFILE, PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION, PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES, PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES, PI_DEVICE_INFO_REFERENCE_COUNT, PI_DEVICE_INFO_SINGLE_FP_CONFIG, PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, PI_DEVICE_INFO_TYPE, PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, PI_DEVICE_INFO_USM_DEVICE_SUPPORT, PI_DEVICE_INFO_USM_HOST_SUPPORT, PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, PI_DEVICE_INFO_UUID, PI_DEVICE_INFO_VENDOR, PI_DEVICE_INFO_VENDOR_ID, PI_DEVICE_INFO_VERSION, PI_DEVICE_LOCAL_MEM_TYPE_LOCAL, PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE, PI_DEVICE_TYPE_GPU, PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP, PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES, PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, PI_FALSE, PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT, PI_FP_DENORM, PI_FP_FMA, PI_FP_INF_NAN, PI_FP_ROUND_TO_INF, PI_FP_ROUND_TO_NEAREST, PI_FP_ROUND_TO_ZERO, PI_MEMORY_ORDER_ACQ_REL, PI_MEMORY_ORDER_ACQUIRE, PI_MEMORY_ORDER_RELAXED, PI_MEMORY_ORDER_RELEASE, PI_MEMORY_SCOPE_DEVICE, PI_MEMORY_SCOPE_SUB_GROUP, PI_MEMORY_SCOPE_SYSTEM, PI_MEMORY_SCOPE_WORK_GROUP, PI_MEMORY_SCOPE_WORK_ITEM, PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, PI_QUEUE_FLAG_PROFILING_ENABLE, PI_TRUE, PI_USM_ACCESS, PI_USM_ATOMIC_ACCESS, PI_USM_CONCURRENT_ACCESS, and PI_USM_CONCURRENT_ATOMIC_ACCESS.
Referenced by cuda_piPlatformsGet(), and piPluginInit().
pi_result cuda_piDevicePartition | ( | pi_device | , |
const pi_device_partition_property * | , | ||
pi_uint32 | , | ||
pi_device * | , | ||
pi_uint32 * | |||
) |
Not applicable to CUDA, devices cannot be partitioned.
Definition at line 1143 of file pi_cuda.cpp.
Referenced by piPluginInit().
Definition at line 1204 of file pi_cuda.cpp.
Referenced by piPluginInit(), _pi_context::~_pi_context(), and _pi_queue::~_pi_queue().
Definition at line 1088 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piDevicesGet | ( | pi_platform | platform, |
pi_device_type | device_type, | ||
pi_uint32 | num_entries, | ||
pi_device * | devices, | ||
pi_uint32 * | num_devices | ||
) |
devices | List of devices available on the system |
num_devices | Number of elements in the list of devices Requesting a non-GPU device triggers an error, all PI CUDA devices are GPUs. |
Definition at line 1056 of file pi_cuda.cpp.
References PI_DEVICE_TYPE_DEFAULT, and PI_DEVICE_TYPE_GPU.
Referenced by piPluginInit().
pi_result cuda_piEnqueueEventsWait | ( | pi_queue | command_queue, |
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Enqueues a wait on the given CUstream for all events.
See enqueueEventWait TODO: Add support for multiple streams once the Event class is properly refactored.
Definition at line 4137 of file pi_cuda.cpp.
References cuda_piEnqueueEventsWaitWithBarrier().
Referenced by cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemUnmap(), and piPluginInit().
pi_result cuda_piEnqueueEventsWaitWithBarrier | ( | pi_queue | command_queue, |
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Enqueues a wait on the given CUstream for all specified events (See enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued wait will wait on all previous events in the queue.
[in] | command_queue | A valid PI queue. |
[in] | num_events_in_wait_list | Number of events in event_wait_list. |
[in] | event_wait_list | Events to wait on. |
[out] | event | Event for when all events in event_wait_list have finished or, if event_wait_list is empty, when all previous events in the queue have finished. |
Definition at line 4157 of file pi_cuda.cpp.
References _pi_queue::barrier_event_, _pi_queue::barrier_mutex_, _pi_queue::barrier_tmp_event_, _pi_queue::compute_applied_barrier_, _pi_queue::get_context(), _pi_queue::get_next_compute_stream(), _pi_event::make_native(), PI_COMMAND_TYPE_MARKER, _pi_queue::sync_streams(), and _pi_queue::transfer_applied_barrier_.
Referenced by cuda_piEnqueueEventsWait(), cuda_piEnqueueKernelLaunch(), and piPluginInit().
pi_result cuda_piEnqueueKernelLaunch | ( | pi_queue | command_queue, |
pi_kernel | kernel, | ||
pi_uint32 | work_dim, | ||
const size_t * | global_work_offset, | ||
const size_t * | global_work_size, | ||
const size_t * | local_work_size, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 3215 of file pi_cuda.cpp.
References cuda_piEnqueueEventsWaitWithBarrier(), _pi_queue::device_, _pi_device::get(), _pi_queue::get_context(), _pi_queue::get_device(), _pi_device::get_max_work_group_size(), _pi_device::get_max_work_item_sizes(), _pi_queue::get_next_compute_stream(), _pi_event::make_native(), PI_COMMAND_TYPE_NDRANGE_KERNEL, and setErrorMessage().
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemBufferCopy | ( | pi_queue | command_queue, |
pi_mem | src_buffer, | ||
pi_mem | dst_buffer, | ||
size_t | src_offset, | ||
size_t | dst_offset, | ||
size_t | size, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4576 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_, and PI_COMMAND_TYPE_MEM_BUFFER_COPY.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemBufferCopyRect | ( | pi_queue | command_queue, |
pi_mem | src_buffer, | ||
pi_mem | dst_buffer, | ||
pi_buff_rect_offset | src_origin, | ||
pi_buff_rect_offset | dst_origin, | ||
pi_buff_rect_region | region, | ||
size_t | src_row_pitch, | ||
size_t | src_slice_pitch, | ||
size_t | dst_row_pitch, | ||
size_t | dst_slice_pitch, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4620 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, commonEnqueueMemBufferCopyRect(), _pi_mem::mem_::buffer_mem_::get(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_, and PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemBufferFill | ( | pi_queue | command_queue, |
pi_mem | buffer, | ||
const void * | pattern, | ||
size_t | pattern_size, | ||
size_t | offset, | ||
size_t | size, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4665 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_, and PI_COMMAND_TYPE_MEM_BUFFER_FILL.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemBufferMap | ( | pi_queue | command_queue, |
pi_mem | buffer, | ||
pi_bool | blocking_map, | ||
pi_map_flags | map_flags, | ||
size_t | offset, | ||
size_t | size, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event, | ||
void ** | ret_map | ||
) |
Implements mapping on the host using a BufferRead operation.
Mapped pointers are stored in the pi_mem object. If the buffer uses pinned host memory a pointer to that memory is returned and no read operation is done.
Definition at line 5069 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::allocMode_, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_, cuda_piEnqueueEventsWait(), cuda_piEnqueueMemBufferRead(), _pi_queue::get_context(), _pi_mem::mem_::buffer_mem_::get_map_ptr(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_::buffer_mem_::map_to_ptr(), _pi_mem::mem_, _pi_mem::mem_type_, PI_COMMAND_TYPE_MEM_BUFFER_MAP, PI_MAP_READ, and PI_MAP_WRITE.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemBufferRead | ( | pi_queue | command_queue, |
pi_mem | buffer, | ||
pi_bool | blocking_read, | ||
size_t | offset, | ||
size_t | size, | ||
void * | ptr, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 2906 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_, and PI_COMMAND_TYPE_MEM_BUFFER_READ.
Referenced by cuda_piEnqueueMemBufferMap(), and piPluginInit().
pi_result cuda_piEnqueueMemBufferReadRect | ( | pi_queue | command_queue, |
pi_mem | buffer, | ||
pi_bool | blocking_read, | ||
pi_buff_rect_offset | buffer_offset, | ||
pi_buff_rect_offset | host_offset, | ||
pi_buff_rect_region | region, | ||
size_t | buffer_row_pitch, | ||
size_t | buffer_slice_pitch, | ||
size_t | host_row_pitch, | ||
size_t | host_slice_pitch, | ||
void * | ptr, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4475 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, commonEnqueueMemBufferCopyRect(), _pi_mem::mem_::buffer_mem_::get(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_, and PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemBufferWrite | ( | pi_queue | command_queue, |
pi_mem | buffer, | ||
pi_bool | blocking_write, | ||
size_t | offset, | ||
size_t | size, | ||
const void * | ptr, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 2860 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_, and PI_COMMAND_TYPE_MEM_BUFFER_WRITE.
Referenced by cuda_piEnqueueMemUnmap(), and piPluginInit().
pi_result cuda_piEnqueueMemBufferWriteRect | ( | pi_queue | command_queue, |
pi_mem | buffer, | ||
pi_bool | blocking_write, | ||
pi_buff_rect_offset | buffer_offset, | ||
pi_buff_rect_offset | host_offset, | ||
pi_buff_rect_region | region, | ||
size_t | buffer_row_pitch, | ||
size_t | buffer_slice_pitch, | ||
size_t | host_row_pitch, | ||
size_t | host_slice_pitch, | ||
const void * | ptr, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4526 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, commonEnqueueMemBufferCopyRect(), _pi_mem::mem_::buffer_mem_::get(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), _pi_mem::mem_, and PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemImageCopy | ( | pi_queue | command_queue, |
pi_mem | src_image, | ||
pi_mem | dst_image, | ||
const size_t * | src_origin, | ||
const size_t * | dst_origin, | ||
const size_t * | region, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4984 of file pi_cuda.cpp.
References commonEnqueueMemImageNDCopy(), _pi_mem::mem_::surface_mem_::get_array(), _pi_queue::get_context(), _pi_mem::mem_::surface_mem_::get_image_type(), _pi_queue::get_next_transfer_stream(), imageElementByteSize(), _pi_event::make_native(), _pi_mem::mem_, _pi_mem::mem_type_, PI_COMMAND_TYPE_IMAGE_COPY, PI_MEM_TYPE_IMAGE1D, _pi_mem::surface, and _pi_mem::mem_::surface_mem_.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemImageFill | ( | pi_queue | , |
pi_mem | , | ||
const void * | , | ||
const size_t * | , | ||
const size_t * | , | ||
pi_uint32 | , | ||
const pi_event * | , | ||
pi_event * | |||
) |
\TODO Not implemented in CUDA.
Definition at line 5057 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemImageRead | ( | pi_queue | command_queue, |
pi_mem | image, | ||
pi_bool | blocking_read, | ||
const size_t * | origin, | ||
const size_t * | region, | ||
size_t | row_pitch, | ||
size_t | slice_pitch, | ||
void * | ptr, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4851 of file pi_cuda.cpp.
References commonEnqueueMemImageNDCopy(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), imageElementByteSize(), _pi_event::make_native(), PI_COMMAND_TYPE_IMAGE_READ, PI_MEM_TYPE_IMAGE1D, and _pi_mem::surface.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemImageWrite | ( | pi_queue | command_queue, |
pi_mem | image, | ||
pi_bool | blocking_write, | ||
const size_t * | origin, | ||
const size_t * | region, | ||
size_t | input_row_pitch, | ||
size_t | input_slice_pitch, | ||
const void * | ptr, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 4919 of file pi_cuda.cpp.
References commonEnqueueMemImageNDCopy(), _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), imageElementByteSize(), _pi_event::make_native(), PI_COMMAND_TYPE_IMAGE_WRITE, PI_MEM_TYPE_IMAGE1D, and _pi_mem::surface.
Referenced by piPluginInit().
pi_result cuda_piEnqueueMemUnmap | ( | pi_queue | command_queue, |
pi_mem | memobj, | ||
void * | mapped_ptr, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Implements the unmap from the host, using a BufferWrite operation.
Requires the mapped pointer to be already registered in the given memobj. If memobj uses pinned host memory, this will not do a write.
Definition at line 5130 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::allocMode_, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_, cuda_piEnqueueEventsWait(), cuda_piEnqueueMemBufferWrite(), _pi_queue::get_context(), _pi_mem::mem_::buffer_mem_::get_map_flags(), _pi_mem::mem_::buffer_mem_::get_map_offset(), _pi_mem::mem_::buffer_mem_::get_map_ptr(), _pi_queue::get_next_transfer_stream(), _pi_mem::mem_::buffer_mem_::get_size(), _pi_event::make_native(), _pi_mem::mem_, _pi_mem::mem_type_, PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, PI_MAP_WRITE, PI_MAP_WRITE_INVALIDATE_REGION, and _pi_mem::mem_::buffer_mem_::unmap().
Referenced by piPluginInit().
pi_result cuda_piEnqueueNativeKernel | ( | pi_queue | , |
void(*)(void *) | , | ||
void * | , | ||
size_t | , | ||
pi_uint32 | , | ||
const pi_mem * | , | ||
const void ** | , | ||
pi_uint32 | , | ||
const pi_event * | , | ||
pi_event * | |||
) |
\TODO Not implemented
Definition at line 3379 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piEventCreate | ( | pi_context | , |
pi_event * | |||
) |
Definition at line 4017 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piEventGetInfo | ( | pi_event | event, |
pi_event_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 4021 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, getInfo(), PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, PI_EVENT_INFO_COMMAND_QUEUE, PI_EVENT_INFO_COMMAND_TYPE, PI_EVENT_INFO_CONTEXT, and PI_EVENT_INFO_REFERENCE_COUNT.
Referenced by piPluginInit().
pi_result cuda_piEventGetProfilingInfo | ( | pi_event | event, |
pi_profiling_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Obtain profiling information from PI CUDA events \TODO Timings from CUDA are only elapsed time.
Definition at line 4052 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, sycl::_V1::detail::pi::die(), PI_PROFILING_INFO_COMMAND_END, PI_PROFILING_INFO_COMMAND_QUEUED, PI_PROFILING_INFO_COMMAND_START, PI_PROFILING_INFO_COMMAND_SUBMIT, PI_QUEUE_FLAG_PROFILING_ENABLE, and _pi_queue::properties_.
Referenced by piPluginInit().
Definition at line 4107 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::assertion().
Referenced by piPluginInit().
Definition at line 4095 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::assertion().
Referenced by piPluginInit().
pi_result cuda_piEventSetCallback | ( | pi_event | , |
pi_int32 | , | ||
pfn_notify | , | ||
void * | |||
) |
Definition at line 4085 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
Definition at line 4090 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
Definition at line 2953 of file pi_cuda.cpp.
References _pi_event::get_context().
Referenced by piPluginInit().
pi_result cuda_piextContextCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_uint32 | num_devices, | ||
const pi_device * | devices, | ||
bool | ownNativeHandle, | ||
pi_context * | piContext | ||
) |
Created a PI context object from a CUDA context handle.
NOTE: The created PI object does not take ownership of the native handle.
[in] | nativeHandle | The native handle to create PI context object from. |
[out] | context | Set to the PI context object created from native handle. |
Definition at line 2334 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piextContextGetNativeHandle | ( | pi_context | context, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native CUDA handle of a PI context object.
[in] | context | The PI context to get the native CUDA object of. |
[out] | nativeHandle | Set to the native handle of the PI context object. |
Definition at line 2321 of file pi_cuda.cpp.
References _pi_context::get().
Referenced by piPluginInit().
pi_result cuda_piextContextSetExtendedDeleter | ( | pi_context | context, |
pi_context_extended_deleter | function, | ||
void * | user_data | ||
) |
Definition at line 1136 of file pi_cuda.cpp.
References _pi_context::set_extended_deleter().
Referenced by piPluginInit().
pi_result cuda_piextDeviceCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_platform | platform, | ||
pi_device * | piDevice | ||
) |
Created a PI device object from a CUDA device handle.
NOTE: The created PI object does not take ownership of the native handle.
[in] | nativeHandle | The native handle to create PI device object from. |
[in] | platform | is the PI platform of the device. |
[out] | device | Set to the PI device object created from native handle. |
Definition at line 2205 of file pi_cuda.cpp.
References cuda_piPlatformsGet(), _pi_device::get(), and sycl::_V1::malloc().
Referenced by piPluginInit().
pi_result cuda_piextDeviceGetNativeHandle | ( | pi_device | device, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native CUDA handle of a PI device object.
[in] | device | The PI device to get the native CUDA object of. |
[out] | nativeHandle | Set to the native handle of the PI device object. |
Definition at line 2191 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piextDeviceSelectBinary | ( | pi_device | device, |
pi_device_binary * | binaries, | ||
pi_uint32 | num_binaries, | ||
pi_uint32 * | selected_binary | ||
) |
Definition at line 1151 of file pi_cuda.cpp.
References __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64, and sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextEnqueueDeviceGlobalVariableRead | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | name, | ||
pi_bool | blocking_read, | ||
size_t | count, | ||
size_t | offset, | ||
void * | dst, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 5756 of file pi_cuda.cpp.
References cuda_piextUSMEnqueueMemcpy(), _pi_program::get(), and _pi_program::globalIDMD_.
Referenced by piPluginInit().
pi_result cuda_piextEnqueueDeviceGlobalVariableWrite | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | name, | ||
pi_bool | blocking_write, | ||
size_t | count, | ||
size_t | offset, | ||
const void * | src, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Definition at line 5718 of file pi_cuda.cpp.
References cuda_piextUSMEnqueueMemcpy(), _pi_program::get(), and _pi_program::globalIDMD_.
Referenced by piPluginInit().
pi_result cuda_piextEnqueueReadHostPipe | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | pipe_symbol, | ||
pi_bool | blocking, | ||
void * | ptr, | ||
size_t | size, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Host Pipes.
Definition at line 5795 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextEnqueueWriteHostPipe | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | pipe_symbol, | ||
pi_bool | blocking, | ||
void * | ptr, | ||
size_t | size, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Definition at line 5813 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextEventCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_context | context, | ||
bool | ownNativeHandle, | ||
pi_event * | event | ||
) |
Created a PI event object from a CUDA event handle.
TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.
[in] | nativeHandle | The native handle to create PI event object from. |
[out] | event | Set to the PI event object created from native handle. |
Definition at line 4259 of file pi_cuda.cpp.
References _pi_event::make_with_native().
Referenced by piPluginInit().
pi_result cuda_piextEventGetNativeHandle | ( | pi_event | event, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native CUDA handle of a PI event object.
[in] | event | The PI event to get the native CUDA object of. |
[out] | nativeHandle | Set to the native handle of the PI event object. |
Definition at line 4245 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piextGetDeviceFunctionPointer | ( | [[maybe_unused] ] pi_device | device, |
pi_program | program, | ||
const char * | func_name, | ||
pi_uint64 * | func_pointer_ret | ||
) |
Definition at line 1179 of file pi_cuda.cpp.
References _pi_program::get(), _pi_program::get_context(), and _pi_context::get_device().
Referenced by piPluginInit().
pi_result cuda_piextKernelCreateWithNativeHandle | ( | pi_native_handle | , |
pi_context | , | ||
pi_program | , | ||
bool | , | ||
pi_kernel * | |||
) |
Definition at line 3386 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextKernelSetArgMemObj | ( | pi_kernel | kernel, |
pi_uint32 | arg_index, | ||
const pi_mem * | arg_value | ||
) |
Definition at line 3045 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), _pi_mem::mem_::surface_mem_::get_array(), _pi_mem::mem_::surface_mem_::get_surface(), _pi_mem::mem_, _pi_mem::mem_type_, setErrorMessage(), _pi_mem::surface, and _pi_mem::mem_::surface_mem_.
Referenced by piPluginInit().
pi_result cuda_piextKernelSetArgPointer | ( | pi_kernel | kernel, |
pi_uint32 | arg_index, | ||
size_t | arg_size, | ||
const void * | arg_value | ||
) |
Definition at line 4007 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piextKernelSetArgSampler | ( | pi_kernel | kernel, |
pi_uint32 | arg_index, | ||
const pi_sampler * | arg_value | ||
) |
Definition at line 3086 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piextMemCreateWithNativeHandle | ( | pi_native_handle | , |
pi_context | , | ||
bool | , | ||
pi_mem * | |||
) |
Created a PI mem object from a CUDA mem handle.
TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.
[in] | nativeHandle | The native handle to create PI mem object from. |
[in] | context | The PI context of the memory allocation. |
[in] | ownNativeHandle | Indicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT. |
[out] | mem | Set to the PI mem object created from native handle. |
Definition at line 2582 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextMemGetNativeHandle | ( | pi_mem | mem, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native CUDA handle of a PI mem object.
[in] | mem | The PI mem to get the native CUDA object of. |
[out] | nativeHandle | Set to the native handle of the PI mem object. |
Definition at line 2565 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::get(), and _pi_mem::mem_.
Referenced by piPluginInit().
pi_result cuda_piextMemImageCreateWithNativeHandle | ( | pi_native_handle | , |
pi_context | , | ||
bool | , | ||
const pi_image_format * | , | ||
const pi_image_desc * | , | ||
pi_mem * | |||
) |
Created a PI image mem object from a CUDA image mem handle.
TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.
[in] | pi_native_handle | The native handle to create PI mem object from. |
[in] | pi_context | The PI context of the memory allocation. |
[in] | ownNativeHandle | Boolean indicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT. |
[in] | pi_image_format | The format of the image. |
[in] | pi_image_desc | The description information for the image. |
[out] | pi_mem | Set to the PI mem object created from native handle. |
Definition at line 2602 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
pi_result cuda_piextProgramCreateWithNativeHandle | ( | pi_native_handle | , |
pi_context | , | ||
bool | , | ||
pi_program * | |||
) |
Created a PI program object from a CUDA program handle.
TODO: Implement this. NOTE: The created PI object takes ownership of the native handle.
[in] | nativeHandle | The native handle to create PI program object from. |
[in] | context | The PI context of the program. |
[out] | program | Set to the PI program object created from native handle. |
Definition at line 3871 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextProgramGetNativeHandle | ( | pi_program | program, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native CUDA handle of a PI program object.
[in] | program | The PI program to get the native CUDA object of. |
[out] | nativeHandle | Set to the native handle of the PI program object. |
Definition at line 3856 of file pi_cuda.cpp.
References _pi_program::get().
Referenced by piPluginInit().
pi_result cuda_piextProgramSetSpecializationConstant | ( | pi_program | , |
pi_uint32 | , | ||
size_t | , | ||
const void * | |||
) |
Definition at line 3999 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextQueueCreate | ( | pi_context | Context, |
pi_device | Device, | ||
pi_queue_properties * | Properties, | ||
pi_queue * | Queue | ||
) |
Definition at line 2661 of file pi_cuda.cpp.
References cuda_piQueueCreate(), and PI_QUEUE_FLAGS.
Referenced by piPluginInit().
pi_result cuda_piextQueueCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
int32_t | NativeHandleDesc, | ||
pi_context | context, | ||
pi_device | device, | ||
bool | ownNativeHandle, | ||
pi_queue_properties * | Properties, | ||
pi_queue * | queue | ||
) |
Created a PI queue object from a CUDA queue handle.
NOTE: The created PI object does not take ownership of the native handle.
[in] | nativeHandle | The native handle to create PI queue object from. |
[in] | nativeHandleDesc | Info about the native handle. |
[in] | context | is the PI context of the queue. |
[out] | queue | Set to the PI queue object created from native handle. |
ownNativeHandle | tells if SYCL RT should assume the ownership of the native handle, if it can. |
Definition at line 2820 of file pi_cuda.cpp.
References __SYCL_PI_CUDA_SYNC_WITH_DEFAULT, __SYCL_PI_CUDA_USE_DEFAULT_STREAM, sycl::_V1::detail::pi::die(), and _pi_context::get_device().
Referenced by piPluginInit().
pi_result cuda_piextQueueGetNativeHandle | ( | pi_queue | queue, |
pi_native_handle * | nativeHandle, | ||
int32_t * | NativeHandleDesc | ||
) |
Gets the native CUDA handle of a PI queue object.
[in] | queue | The PI queue to get the native CUDA object of. |
[in] | NativeHandleDesc | Pointer to additional native handle info. |
[out] | nativeHandle | Set to the native handle of the PI queue object. |
Definition at line 2799 of file pi_cuda.cpp.
References _pi_queue::get_context(), and _pi_queue::get_next_compute_stream().
Referenced by piPluginInit().
pi_result cuda_piextUSMDeviceAlloc | ( | void ** | result_ptr, |
pi_context | context, | ||
[[maybe_unused] ] pi_device | device, | ||
[[maybe_unused] ] pi_usm_mem_properties * | properties, | ||
size_t | size, | ||
[[maybe_unused] ] pi_uint32 | alignment | ||
) |
USM: Implements USM device allocations using a normal CUDA device pointer.
Definition at line 5208 of file pi_cuda.cpp.
References sycl::_V1::ext::oneapi::experimental::alignment.
Referenced by piPluginInit().
pi_result cuda_piextUSMEnqueueFill2D | ( | pi_queue | , |
void * | , | ||
size_t | , | ||
size_t | , | ||
const void * | , | ||
size_t | , | ||
size_t | , | ||
pi_uint32 | , | ||
const pi_event * | , | ||
pi_event * | |||
) |
Definition at line 5522 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextUSMEnqueueMemAdvise | ( | pi_queue | queue, |
const void * | ptr, | ||
size_t | length, | ||
pi_mem_advice | advice, | ||
pi_event * | event | ||
) |
USM: memadvise API to govern behavior of automatic migration mechanisms.
Definition at line 5419 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die(), _pi_device::get(), _pi_queue::get_context(), _pi_context::get_device(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), PI_COMMAND_TYPE_USER, PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY, PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST, PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION, PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST, PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY, PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY, PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST, PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION, PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST, PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY, PI_MEM_ADVICE_RESET, and setErrorMessage().
Referenced by piPluginInit().
pi_result cuda_piextUSMEnqueueMemcpy | ( | pi_queue | queue, |
pi_bool | blocking, | ||
void * | dst_ptr, | ||
const void * | src_ptr, | ||
size_t | size, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Definition at line 5320 of file pi_cuda.cpp.
References _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), and PI_COMMAND_TYPE_MEM_BUFFER_COPY.
Referenced by cuda_piextEnqueueDeviceGlobalVariableRead(), cuda_piextEnqueueDeviceGlobalVariableWrite(), and piPluginInit().
pi_result cuda_piextUSMEnqueueMemcpy2D | ( | pi_queue | queue, |
pi_bool | blocking, | ||
void * | dst_ptr, | ||
size_t | dst_pitch, | ||
const void * | src_ptr, | ||
size_t | src_pitch, | ||
size_t | width, | ||
size_t | height, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
2D Memcpy API
queue | is the queue to submit to |
blocking | is whether this operation should block the host |
dst_ptr | is the location the data will be copied |
dst_pitch | is the total width of the destination memory including padding |
src_ptr | is the data to be copied |
dst_pitch | is the total width of the source memory including padding |
width | is width in bytes of each row to be copied |
height | is height the columns to be copied |
num_events_in_waitlist | is the number of events to wait on |
events_waitlist | is an array of events to wait on |
event | is the event that represents this operation |
Definition at line 5552 of file pi_cuda.cpp.
References _pi_queue::get_context(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), and PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT.
Referenced by piPluginInit().
pi_result cuda_piextUSMEnqueueMemset | ( | pi_queue | queue, |
void * | ptr, | ||
pi_int32 | value, | ||
size_t | count, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Definition at line 5285 of file pi_cuda.cpp.
References _pi_queue::get_context(), _pi_queue::get_next_compute_stream(), _pi_event::make_native(), and PI_COMMAND_TYPE_MEM_BUFFER_FILL.
Referenced by piPluginInit().
pi_result cuda_piextUSMEnqueueMemset2D | ( | pi_queue | , |
void * | , | ||
size_t | , | ||
int | , | ||
size_t | , | ||
size_t | , | ||
pi_uint32 | , | ||
const pi_event * | , | ||
pi_event * | |||
) |
Definition at line 5531 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piextUSMEnqueuePrefetch | ( | pi_queue | queue, |
const void * | ptr, | ||
size_t | size, | ||
pi_usm_migration_flags | flags, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Definition at line 5360 of file pi_cuda.cpp.
References _pi_queue::get_context(), _pi_context::get_device(), _pi_queue::get_next_transfer_stream(), _pi_event::make_native(), PI_COMMAND_TYPE_MEM_BUFFER_COPY, and setErrorMessage().
Referenced by piPluginInit().
pi_result cuda_piextUSMFree | ( | pi_context | context, |
void * | ptr | ||
) |
USM: Frees the given USM pointer associated with the context.
Definition at line 5258 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piextUSMGetMemAllocInfo | ( | pi_context | context, |
const void * | ptr, | ||
pi_mem_alloc_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation.
Result must fit in void * PI_MEM_ALLOC_SIZE returns how big the queried pointer's allocation is in bytes. Result is a size_t. PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against
context | is the pi_context |
ptr | is the pointer to query |
param_name | is the type of query to perform |
param_value_size | is the size of the result in bytes |
param_value | is the result |
param_value_size_ret | is how many bytes were written |
Definition at line 5620 of file pi_cuda.cpp.
References cuda_piPlatformsGet(), getInfo(), PI_MEM_ALLOC_BASE_PTR, PI_MEM_ALLOC_DEVICE, PI_MEM_ALLOC_SIZE, PI_MEM_ALLOC_TYPE, PI_MEM_TYPE_DEVICE, PI_MEM_TYPE_HOST, PI_MEM_TYPE_SHARED, and PI_MEM_TYPE_UNKNOWN.
Referenced by piPluginInit().
pi_result cuda_piextUSMHostAlloc | ( | void ** | result_ptr, |
pi_context | context, | ||
[[maybe_unused] ] pi_usm_mem_properties * | properties, | ||
size_t | size, | ||
[[maybe_unused] ] pi_uint32 | alignment | ||
) |
USM: Implements USM Host allocations using CUDA Pinned Memory.
Definition at line 5185 of file pi_cuda.cpp.
References sycl::_V1::ext::oneapi::experimental::alignment.
Referenced by piPluginInit().
pi_result cuda_piextUSMSharedAlloc | ( | void ** | result_ptr, |
pi_context | context, | ||
[[maybe_unused] ] pi_device | device, | ||
[[maybe_unused] ] pi_usm_mem_properties * | properties, | ||
size_t | size, | ||
[[maybe_unused] ] pi_uint32 | alignment | ||
) |
USM: Implements USM Shared allocations using CUDA Managed Memory.
Definition at line 5233 of file pi_cuda.cpp.
References sycl::_V1::ext::oneapi::experimental::alignment.
Referenced by piPluginInit().
pi_result cuda_piGetDeviceAndHostTimer | ( | pi_device | Device, |
uint64_t * | DeviceTime, | ||
uint64_t * | HostTime | ||
) |
Definition at line 5843 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piKernelCreate | ( | pi_program | program, |
const char * | kernel_name, | ||
pi_kernel * | kernel | ||
) |
Definition at line 2988 of file pi_cuda.cpp.
References _pi_program::get(), and _pi_program::get_context().
Referenced by piPluginInit().
pi_result cuda_piKernelGetGroupInfo | ( | pi_kernel | kernel, |
pi_device | device, | ||
pi_kernel_group_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 3102 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, sycl::_V1::detail::pi::assertion(), getInfo(), getInfoArray(), PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE, PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE, PI_KERNEL_GROUP_INFO_NUM_REGS, PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE, and PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE.
Referenced by piPluginInit().
pi_result cuda_piKernelGetInfo | ( | pi_kernel | kernel, |
pi_kernel_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 3878 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, getInfo(), PI_KERNEL_INFO_ATTRIBUTES, PI_KERNEL_INFO_CONTEXT, PI_KERNEL_INFO_FUNCTION_NAME, PI_KERNEL_INFO_NUM_ARGS, PI_KERNEL_INFO_PROGRAM, and PI_KERNEL_INFO_REFERENCE_COUNT.
Referenced by piPluginInit().
pi_result cuda_piKernelGetSubGroupInfo | ( | pi_kernel | kernel, |
pi_device | device, | ||
pi_kernel_sub_group_info | param_name, | ||
size_t | input_value_size, | ||
const void * | input_value, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 3914 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, sycl::_V1::detail::pi::assertion(), getInfo(), PI_KERNEL_COMPILE_NUM_SUB_GROUPS, PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, PI_KERNEL_MAX_NUM_SUB_GROUPS, and PI_KERNEL_MAX_SUB_GROUP_SIZE.
Referenced by piPluginInit().
Definition at line 3975 of file pi_cuda.cpp.
Referenced by piPluginInit().
Definition at line 3967 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piKernelSetArg | ( | pi_kernel | kernel, |
pi_uint32 | arg_index, | ||
size_t | arg_size, | ||
const void * | arg_value | ||
) |
Definition at line 3028 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piKernelSetExecInfo | ( | pi_kernel | , |
pi_kernel_exec_info | , | ||
size_t | , | ||
const void * | |||
) |
Definition at line 3994 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piMemBufferCreate | ( | pi_context | context, |
pi_mem_flags | flags, | ||
size_t | size, | ||
void * | host_ptr, | ||
pi_mem * | ret_mem, | ||
[[maybe_unused] ] const pi_mem_properties * | properties | ||
) |
Creates a PI Memory object using a CUDA memory allocation.
Can trigger a manual copy depending on the mode. \TODO Implement USE_HOST_PTR using cuHostRegister
Definition at line 2355 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::mem_::buffer_mem_::copy_in, PI_MEM_FLAGS_HOST_PTR_ALLOC, PI_MEM_FLAGS_HOST_PTR_COPY, PI_MEM_FLAGS_HOST_PTR_USE, and _pi_mem::mem_::buffer_mem_::use_host_ptr.
Referenced by piPluginInit().
pi_result cuda_piMemBufferPartition | ( | pi_mem | parent_buffer, |
pi_mem_flags | flags, | ||
[[maybe_unused] ] pi_buffer_create_type | buffer_create_type, | ||
void * | buffer_create_info, | ||
pi_mem * | memObj | ||
) |
Implements a buffer partition in the CUDA backend.
A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented as an offset over an existing CUDA allocation.
Definition at line 2495 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::context_, _pi_mem::mem_::buffer_mem_::get_size(), _pi_mem::mem_::buffer_mem_::hostPtr_, _pi_mem::is_buffer(), _pi_mem::is_sub_buffer(), _pi_mem::mem_, PI_BUFFER_CREATE_TYPE_REGION, PI_MEM_FLAGS_ACCESS_RW, and _pi_mem::mem_::buffer_mem_::ptr_.
Referenced by piPluginInit().
pi_result cuda_piMemGetInfo | ( | pi_mem | , |
pi_mem_info | , | ||
size_t | , | ||
void * | , | ||
size_t * | |||
) |
Definition at line 2555 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piMemImageCreate | ( | pi_context | context, |
pi_mem_flags | flags, | ||
const pi_image_format * | image_format, | ||
const pi_image_desc * | image_desc, | ||
void * | host_ptr, | ||
pi_mem * | ret_mem | ||
) |
\TODO Not implemented
Definition at line 3394 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die(), _pi_image_desc::image_depth, _pi_image_desc::image_height, _pi_image_desc::image_type, _pi_image_desc::image_width, PI_IMAGE_CHANNEL_ORDER_RGBA, PI_IMAGE_CHANNEL_TYPE_FLOAT, PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT, PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16, PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32, PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8, PI_IMAGE_CHANNEL_TYPE_UNORM_INT16, PI_IMAGE_CHANNEL_TYPE_UNORM_INT8, PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, PI_MEM_FLAGS_HOST_PTR_COPY, PI_MEM_FLAGS_HOST_PTR_USE, PI_MEM_TYPE_IMAGE1D, PI_MEM_TYPE_IMAGE2D, and PI_MEM_TYPE_IMAGE3D.
Referenced by piPluginInit().
pi_result cuda_piMemImageGetInfo | ( | pi_mem | , |
pi_image_info | , | ||
size_t | , | ||
void * | , | ||
size_t * | |||
) |
\TODO Not implemented
Definition at line 3550 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
Decreases the reference count of the Mem object.
If this is zero, calls the relevant CUDA Free function
Definition at line 2431 of file pi_cuda.cpp.
References _pi_mem::mem_::buffer_mem_::alloc_host_ptr, _pi_mem::buffer, _pi_mem::mem_::buffer_mem_::classic, _pi_mem::mem_::buffer_mem_::copy_in, _pi_mem::decrement_reference_count(), sycl::_V1::detail::pi::die(), _pi_mem::is_sub_buffer(), _pi_mem::mem_type_, _pi_mem::surface, and _pi_mem::mem_::buffer_mem_::use_host_ptr.
Referenced by piPluginInit(), and _pi_mem::~_pi_mem().
Definition at line 3556 of file pi_cuda.cpp.
References _pi_mem::get_reference_count(), and _pi_mem::increment_reference_count().
Referenced by piPluginInit().
pi_result cuda_piPlatformGetInfo | ( | [[maybe_unused] ] pi_platform | platform, |
pi_platform_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 1015 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, sycl::_V1::detail::pi::die(), getInfo(), PI_EXT_PLATFORM_BACKEND_CUDA, PI_EXT_PLATFORM_INFO_BACKEND, PI_PLATFORM_INFO_EXTENSIONS, PI_PLATFORM_INFO_NAME, PI_PLATFORM_INFO_PROFILE, PI_PLATFORM_INFO_VENDOR, and PI_PLATFORM_INFO_VERSION.
Referenced by piPluginInit().
pi_result cuda_piPlatformsGet | ( | pi_uint32 | num_entries, |
pi_platform * | platforms, | ||
pi_uint32 * | num_platforms | ||
) |
Obtains the CUDA platform.
There is only one CUDA platform, and contains all devices on the system. Triggers the CUDA Driver initialization (cuInit) the first time, so this must be the first PI API called.
However because multiple devices in a context is not currently supported, place each device in a separate platform.
Definition at line 908 of file pi_cuda.cpp.
References cuda_piDeviceGetInfo(), PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, and PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES.
Referenced by cuda_piextDeviceCreateWithNativeHandle(), cuda_piextUSMGetMemAllocInfo(), and piPluginInit().
pi_result cuda_piProgramBuild | ( | pi_program | program, |
[[maybe_unused] ] pi_uint32 | num_devices, | ||
[[maybe_unused] ] const pi_device * | device_list, | ||
const char * | options, | ||
[[maybe_unused] ] void(*)(pi_program program, void *user_data) | pfn_notify, | ||
[[maybe_unused] ] void * | user_data | ||
) |
Loads the images from a PI program into a CUmodule that can be used later on to extract functions (kernels).
See _pi_program for implementation details.
Definition at line 3576 of file pi_cuda.cpp.
References _pi_program::build_program(), and _pi_program::get_context().
Referenced by piPluginInit().
pi_result cuda_piProgramCompile | ( | pi_program | program, |
[[maybe_unused] ] pi_uint32 | num_devices, | ||
[[maybe_unused] ] const pi_device * | device_list, | ||
const char * | options, | ||
[[maybe_unused] ] pi_uint32 | num_input_headers, | ||
const pi_program * | input_headers, | ||
const char ** | header_include_names, | ||
[[maybe_unused] ] void(*)(pi_program program, void *user_data) | pfn_notify, | ||
[[maybe_unused] ] void * | user_data | ||
) |
Creates a new program that is the outcome of the compilation of the headers and the program.
\TODO Implement asynchronous compilation
Definition at line 3754 of file pi_cuda.cpp.
References _pi_program::build_program(), and _pi_program::get_context().
Referenced by piPluginInit().
pi_result cuda_piProgramCreate | ( | pi_context | , |
const void * | , | ||
size_t | , | ||
pi_program * | |||
) |
\TODO Not implemented
Definition at line 3601 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by piPluginInit().
pi_result cuda_piProgramCreateWithBinary | ( | pi_context | context, |
[[maybe_unused] ] pi_uint32 | num_devices, | ||
[[maybe_unused] ] const pi_device * | device_list, | ||
const size_t * | lengths, | ||
const unsigned char ** | binaries, | ||
size_t | num_metadata_entries, | ||
const pi_device_binary_property * | metadata, | ||
pi_int32 * | binary_status, | ||
pi_program * | program | ||
) |
Loads images from a list of PTX or CUBIN binaries.
Note: No calls to CUDA driver API in this function, only store binaries for later.
Note: Only supports one device
Definition at line 3612 of file pi_cuda.cpp.
References _pi_device::get(), and _pi_context::get_device().
Referenced by piPluginInit().
pi_result cuda_piProgramGetBuildInfo | ( | pi_program | program, |
pi_device | device, | ||
pi_program_build_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 3784 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_program::buildOptions_, _pi_program::buildStatus_, sycl::_V1::detail::pi::die(), getInfo(), getInfoArray(), _pi_program::infoLog_, _pi_program::MAX_LOG_SIZE, PI_PROGRAM_BUILD_INFO_LOG, PI_PROGRAM_BUILD_INFO_OPTIONS, and PI_PROGRAM_BUILD_INFO_STATUS.
Referenced by piPluginInit().
pi_result cuda_piProgramGetInfo | ( | pi_program | program, |
pi_program_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 3650 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_program::binary_, _pi_program::binarySizeInBytes_, _pi_program::context_, _pi_context::deviceId_, sycl::_V1::detail::pi::die(), _pi_program::get_reference_count(), getInfo(), getInfoArray(), getKernelNames(), PI_PROGRAM_INFO_BINARIES, PI_PROGRAM_INFO_BINARY_SIZES, PI_PROGRAM_INFO_CONTEXT, PI_PROGRAM_INFO_DEVICES, PI_PROGRAM_INFO_KERNEL_NAMES, PI_PROGRAM_INFO_NUM_DEVICES, PI_PROGRAM_INFO_REFERENCE_COUNT, and PI_PROGRAM_INFO_SOURCE.
Referenced by piPluginInit().
pi_result cuda_piProgramLink | ( | pi_context | context, |
[[maybe_unused] ] pi_uint32 | num_devices, | ||
[[maybe_unused] ] const pi_device * | device_list, | ||
const char * | options, | ||
pi_uint32 | num_input_programs, | ||
const pi_program * | input_programs, | ||
[[maybe_unused] ] void(*)(pi_program program, void *user_data) | pfn_notify, | ||
[[maybe_unused] ] void * | user_data, | ||
pi_program * | ret_program | ||
) |
Creates a new PI program object that is the outcome of linking all input programs.
\TODO Implement linker options, requires mapping of OpenCL to CUDA
Definition at line 3691 of file pi_cuda.cpp.
References _pi_program::binary_, and _pi_program::binarySizeInBytes_.
Referenced by piPluginInit().
pi_result cuda_piProgramRelease | ( | pi_program | program | ) |
Decreases the reference count of a pi_program object.
When the reference count reaches 0, it unloads the module from the context.
Definition at line 3821 of file pi_cuda.cpp.
References _pi_program::decrement_reference_count(), _pi_program::get(), _pi_program::get_context(), and _pi_program::get_reference_count().
Referenced by piPluginInit(), and _pi_kernel::~_pi_kernel().
pi_result cuda_piProgramRetain | ( | pi_program | program | ) |
Definition at line 3811 of file pi_cuda.cpp.
References _pi_program::get_reference_count(), and _pi_program::increment_reference_count().
Referenced by piPluginInit().
pi_result cuda_piQueueCreate | ( | pi_context | context, |
pi_device | device, | ||
pi_queue_properties | properties, | ||
pi_queue * | queue | ||
) |
Creates a pi_queue
object on the CUDA backend.
Valid properties
Definition at line 2618 of file pi_cuda.cpp.
References __SYCL_PI_CUDA_SYNC_WITH_DEFAULT, __SYCL_PI_CUDA_USE_DEFAULT_STREAM, _pi_queue::default_num_compute_streams, _pi_queue::default_num_transfer_streams, _pi_context::get_device(), and PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE.
Referenced by cuda_piextQueueCreate(), and piPluginInit().
Definition at line 2759 of file pi_cuda.cpp.
References _pi_queue::get_context(), and _pi_queue::sync_streams().
Referenced by piPluginInit().
Definition at line 2787 of file pi_cuda.cpp.
Referenced by piPluginInit().
pi_result cuda_piQueueGetInfo | ( | pi_queue | command_queue, |
pi_queue_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Definition at line 2677 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_queue::all_of(), _pi_queue::context_, _pi_queue::device_, sycl::_V1::detail::pi::die(), _pi_queue::get_reference_count(), getInfo(), PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, PI_QUEUE_INFO_CONTEXT, PI_QUEUE_INFO_DEVICE, PI_QUEUE_INFO_PROPERTIES, PI_QUEUE_INFO_REFERENCE_COUNT, and _pi_queue::properties_.
Referenced by piPluginInit().
Definition at line 2731 of file pi_cuda.cpp.
References _pi_queue::backend_has_ownership(), _pi_queue::decrement_reference_count(), _pi_queue::for_each_stream(), and _pi_queue::get_context().
Referenced by piPluginInit(), and _pi_event::~_pi_event().
Definition at line 2723 of file pi_cuda.cpp.
References _pi_queue::get_reference_count(), and _pi_queue::increment_reference_count().
Referenced by piPluginInit().
pi_result cuda_piSamplerCreate | ( | pi_context | context, |
const pi_sampler_properties * | sampler_properties, | ||
pi_sampler * | result_sampler | ||
) |
Creates a PI sampler object.
[in] | context | The context the sampler is created for. |
[in] | sampler_properties | The properties for the sampler. |
[out] | result_sampler | Set to the resulting sampler object. |
Definition at line 4283 of file pi_cuda.cpp.
References PI_SAMPLER_ADDRESSING_MODE_CLAMP, PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, PI_SAMPLER_PROPERTIES_FILTER_MODE, PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS, and PI_TRUE.
Referenced by piPluginInit().
pi_result cuda_piSamplerGetInfo | ( | pi_sampler | sampler, |
pi_sampler_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
Gets information from a PI sampler object.
[in] | sampler | The sampler to get the information from. |
[in] | param_name | The name of the information to get. |
[in] | param_value_size | The size of the param_value. |
[out] | param_value | Set to information value. |
[out] | param_value_size_ret | Set to the size of the information value. |
Definition at line 4342 of file pi_cuda.cpp.
References __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME, _pi_sampler::context_, _pi_sampler::get_reference_count(), getInfo(), PI_SAMPLER_ADDRESSING_MODE_NONE, PI_SAMPLER_FILTER_MODE_NEAREST, PI_SAMPLER_INFO_ADDRESSING_MODE, PI_SAMPLER_INFO_CONTEXT, PI_SAMPLER_INFO_FILTER_MODE, PI_SAMPLER_INFO_NORMALIZED_COORDS, PI_SAMPLER_INFO_REFERENCE_COUNT, and _pi_sampler::props_.
Referenced by piPluginInit().
pi_result cuda_piSamplerRelease | ( | pi_sampler | sampler | ) |
Releases a PI sampler object, decrementing its reference count.
If the reference count reaches zero, the sampler object is destroyed.
[in] | sampler | The sampler to decrement the reference count of. |
Definition at line 4395 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::assertion(), _pi_sampler::decrement_reference_count(), and _pi_sampler::get_reference_count().
Referenced by piPluginInit().
pi_result cuda_piSamplerRetain | ( | pi_sampler | sampler | ) |
Retains a PI sampler object, incrementing its reference count.
[in] | sampler | The sampler to increment the reference count of. |
Definition at line 4383 of file pi_cuda.cpp.
References _pi_sampler::increment_reference_count().
Referenced by piPluginInit().
pi_result cuda_piTearDown | ( | void * | ) |
Definition at line 5838 of file pi_cuda.cpp.
References disableCUDATracing().
Referenced by piPluginInit().
void disableCUDATracing | ( | ) |
Definition at line 103 of file tracing.cpp.
References CUDA_CALL_STREAM_NAME, and CUDA_DEBUG_STREAM_NAME.
Referenced by cuda_piTearDown().
void enableCUDATracing | ( | ) |
Definition at line 72 of file tracing.cpp.
References CUDA_CALL_STREAM_NAME, CUDA_DEBUG_STREAM_NAME, GMajVer, GMinVer, and GVerStr.
Referenced by piPluginInit().
Definition at line 762 of file pi_cuda.cpp.
References _pi_queue::for_each_stream().
std::string getKernelNames | ( | pi_program | ) |
Finds kernel names by searching for entry points in the PTX source, as the CUDA driver API doesn't expose an operation for this.
Note: This is currently only being used by the SYCL program class for the has_kernel method, so an alternative would be to move the has_kernel query to PI and use cuModuleGetFunction to check for a kernel. Note: Another alternative is to add kernel names as metadata, like with reqd_work_group_size.
Definition at line 888 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by cuda_piProgramGetInfo().
|
static |
Definition at line 4765 of file pi_cuda.cpp.
References sycl::_V1::detail::pi::die().
Referenced by cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), and cuda_piEnqueueMemImageWrite().
Definition at line 5870 of file pi_cuda.cpp.
References _PI_CL, _PI_PLUGIN_VERSION_CHECK, cuda_piclProgramCreateWithSource(), cuda_piContextCreate(), cuda_piContextGetInfo(), cuda_piContextRelease(), cuda_piContextRetain(), cuda_piDeviceGetInfo(), cuda_piDevicePartition(), cuda_piDeviceRelease(), cuda_piDeviceRetain(), cuda_piDevicesGet(), cuda_piEnqueueEventsWait(), cuda_piEnqueueEventsWaitWithBarrier(), cuda_piEnqueueKernelLaunch(), cuda_piEnqueueMemBufferCopy(), cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferFill(), cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemBufferRead(), cuda_piEnqueueMemBufferReadRect(), cuda_piEnqueueMemBufferWrite(), cuda_piEnqueueMemBufferWriteRect(), cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageFill(), cuda_piEnqueueMemImageRead(), cuda_piEnqueueMemImageWrite(), cuda_piEnqueueMemUnmap(), cuda_piEnqueueNativeKernel(), cuda_piEventCreate(), cuda_piEventGetInfo(), cuda_piEventGetProfilingInfo(), cuda_piEventRelease(), cuda_piEventRetain(), cuda_piEventSetCallback(), cuda_piEventSetStatus(), cuda_piEventsWait(), cuda_piextContextCreateWithNativeHandle(), cuda_piextContextGetNativeHandle(), cuda_piextContextSetExtendedDeleter(), cuda_piextDeviceCreateWithNativeHandle(), cuda_piextDeviceGetNativeHandle(), cuda_piextDeviceSelectBinary(), cuda_piextEnqueueDeviceGlobalVariableRead(), cuda_piextEnqueueDeviceGlobalVariableWrite(), cuda_piextEnqueueReadHostPipe(), cuda_piextEnqueueWriteHostPipe(), cuda_piextEventCreateWithNativeHandle(), cuda_piextEventGetNativeHandle(), cuda_piextGetDeviceFunctionPointer(), cuda_piextKernelCreateWithNativeHandle(), cuda_piextKernelSetArgMemObj(), cuda_piextKernelSetArgPointer(), cuda_piextKernelSetArgSampler(), cuda_piextMemCreateWithNativeHandle(), cuda_piextMemGetNativeHandle(), cuda_piextProgramCreateWithNativeHandle(), cuda_piextProgramGetNativeHandle(), cuda_piextProgramSetSpecializationConstant(), cuda_piextQueueCreate(), cuda_piextQueueCreateWithNativeHandle(), cuda_piextQueueGetNativeHandle(), cuda_piextUSMDeviceAlloc(), cuda_piextUSMEnqueueFill2D(), cuda_piextUSMEnqueueMemAdvise(), cuda_piextUSMEnqueueMemcpy(), cuda_piextUSMEnqueueMemcpy2D(), cuda_piextUSMEnqueueMemset(), cuda_piextUSMEnqueueMemset2D(), cuda_piextUSMEnqueuePrefetch(), cuda_piextUSMFree(), cuda_piextUSMGetMemAllocInfo(), cuda_piextUSMHostAlloc(), cuda_piextUSMSharedAlloc(), cuda_piGetDeviceAndHostTimer(), cuda_piKernelCreate(), cuda_piKernelGetGroupInfo(), cuda_piKernelGetInfo(), cuda_piKernelGetSubGroupInfo(), cuda_piKernelRelease(), cuda_piKernelRetain(), cuda_piKernelSetArg(), cuda_piKernelSetExecInfo(), cuda_piMemBufferCreate(), cuda_piMemBufferPartition(), cuda_piMemGetInfo(), cuda_piMemImageCreate(), cuda_piMemImageGetInfo(), cuda_piMemRelease(), cuda_piMemRetain(), cuda_piPlatformGetInfo(), cuda_piPlatformsGet(), cuda_piProgramBuild(), cuda_piProgramCompile(), cuda_piProgramCreate(), cuda_piProgramCreateWithBinary(), cuda_piProgramGetBuildInfo(), cuda_piProgramGetInfo(), cuda_piProgramLink(), cuda_piProgramRelease(), cuda_piProgramRetain(), cuda_piQueueCreate(), cuda_piQueueFinish(), cuda_piQueueFlush(), cuda_piQueueGetInfo(), cuda_piQueueRelease(), cuda_piQueueRetain(), cuda_piSamplerCreate(), cuda_piSamplerGetInfo(), cuda_piSamplerRelease(), cuda_piSamplerRetain(), cuda_piTearDown(), enableCUDATracing(), piclProgramCreateWithSource(), piContextCreate(), piContextGetInfo(), piContextRelease(), piContextRetain(), piDeviceGetInfo(), piDevicePartition(), piDeviceRelease(), piDeviceRetain(), piDevicesGet(), piEnqueueEventsWait(), piEnqueueEventsWaitWithBarrier(), piEnqueueKernelLaunch(), piEnqueueMemBufferCopy(), piEnqueueMemBufferCopyRect(), piEnqueueMemBufferFill(), piEnqueueMemBufferMap(), piEnqueueMemBufferRead(), piEnqueueMemBufferReadRect(), piEnqueueMemBufferWrite(), piEnqueueMemBufferWriteRect(), piEnqueueMemImageCopy(), piEnqueueMemImageFill(), piEnqueueMemImageRead(), piEnqueueMemImageWrite(), piEnqueueMemUnmap(), piEnqueueNativeKernel(), piEventCreate, piEventGetInfo(), piEventGetProfilingInfo(), piEventRelease(), piEventRetain(), piEventSetCallback(), piEventSetStatus(), piEventsWait(), piextContextCreateWithNativeHandle(), piextContextGetNativeHandle(), piextContextSetExtendedDeleter(), piextDeviceCreateWithNativeHandle(), piextDeviceGetNativeHandle(), piextDeviceSelectBinary(), piextEnqueueDeviceGlobalVariableRead(), piextEnqueueDeviceGlobalVariableWrite(), piextEnqueueReadHostPipe(), piextEnqueueWriteHostPipe(), piextEventCreateWithNativeHandle(), piextEventGetNativeHandle(), piextGetDeviceFunctionPointer(), piextKernelCreateWithNativeHandle(), piextKernelSetArgMemObj(), piextKernelSetArgPointer(), piextKernelSetArgSampler(), piextMemCreateWithNativeHandle(), piextMemGetNativeHandle(), piextProgramCreateWithNativeHandle(), piextProgramGetNativeHandle(), piextProgramSetSpecializationConstant(), piextQueueCreate(), piextQueueCreateWithNativeHandle(), piextQueueGetNativeHandle(), piextUSMDeviceAlloc(), piextUSMEnqueueFill2D(), piextUSMEnqueueMemAdvise(), piextUSMEnqueueMemcpy(), piextUSMEnqueueMemcpy2D(), piextUSMEnqueueMemset(), piextUSMEnqueueMemset2D(), piextUSMEnqueuePrefetch(), piextUSMFree(), piextUSMGetMemAllocInfo(), piextUSMHostAlloc(), piextUSMSharedAlloc(), _pi_plugin::PiFunctionTable, piGetDeviceAndHostTimer(), piKernelCreate(), piKernelGetGroupInfo(), piKernelGetInfo(), piKernelGetSubGroupInfo(), piKernelRelease(), piKernelRetain(), piKernelSetArg(), piKernelSetExecInfo(), piMemBufferCreate(), piMemBufferPartition(), piMemGetInfo(), piMemImageCreate(), piMemImageGetInfo(), piMemRelease(), piMemRetain(), piPlatformGetInfo(), piPlatformsGet(), piPluginGetBackendOption(), piPluginGetLastError(), piProgramBuild(), piProgramCompile(), piProgramCreate(), piProgramCreateWithBinary(), piProgramGetBuildInfo(), piProgramGetInfo(), piProgramLink(), piProgramRelease(), piProgramRetain(), piQueueCreate(), piQueueFinish(), piQueueFlush(), piQueueGetInfo(), piQueueRelease(), piQueueRetain(), piSamplerCreate(), piSamplerGetInfo(), piSamplerRelease(), piSamplerRetain(), piTearDown(), _pi_plugin::PiVersion, _pi_plugin::PluginVersion, and SupportedVersion.
std::pair<std::string, std::string> splitMetadataName | ( | const std::string & | metadataName | ) |
Definition at line 781 of file pi_cuda.cpp.
Referenced by _pi_program::set_metadata().
const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING |
Definition at line 5868 of file pi_cuda.cpp.
Referenced by piPluginInit().