Implementation of CUDA Plugin. More...
#include <pi_cuda.hpp>
#include <sycl/detail/cuda_definitions.hpp>
#include <sycl/detail/defines.hpp>
#include <sycl/detail/pi.hpp>
#include <sycl/detail/pi.def>
Go to the source code of this file.
Macros | |
#define | _PI_API(api) (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); |
Functions | |
void | enableCUDATracing (cuda_tracing_context_t_ *ctx) |
void | disableCUDATracing (cuda_tracing_context_t_ *ctx) |
cuda_tracing_context_t_ * | createCUDATracingContext () |
void | freeCUDATracingContext (cuda_tracing_context_t_ *Ctx) |
pi_result | piPlatformsGet (pi_uint32 NumEntries, pi_platform *Platforms, pi_uint32 *NumPlatforms) |
pi_result | piPlatformGetInfo (pi_platform Platform, pi_platform_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piextPlatformGetNativeHandle (pi_platform Platform, pi_native_handle *NativeHandle) |
Gets the native handle of a PI platform object. More... | |
pi_result | piextPlatformCreateWithNativeHandle (pi_native_handle NativeHandle, pi_platform *Platform) |
Creates PI platform object from a native handle. More... | |
pi_result | piPluginGetLastError (char **message) |
API to get Plugin specific warning and error messages. More... | |
pi_result | piPluginGetBackendOption (pi_platform platform, const char *frontend_option, const char **backend_option) |
API to get backend specific option. More... | |
pi_result | piDevicesGet (pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) |
pi_result | piDeviceRetain (pi_device Device) |
pi_result | piDeviceRelease (pi_device Device) |
pi_result | piDeviceGetInfo (pi_device Device, pi_device_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for PI_DEVICE_INFO_EXTENSIONS query when the device supports native asserts. More... | |
pi_result | piDevicePartition (pi_device Device, const pi_device_partition_property *Properties, pi_uint32 NumDevices, pi_device *OutDevices, pi_uint32 *OutNumDevices) |
pi_result | piextDeviceSelectBinary (pi_device Device, pi_device_binary *Binaries, pi_uint32 NumBinaries, pi_uint32 *SelectedBinaryInd) |
Selects the most appropriate device binary based on runtime information and the IR characteristics. More... | |
pi_result | piextDeviceGetNativeHandle (pi_device Device, pi_native_handle *NativeHandle) |
Gets the native handle of a PI device object. More... | |
pi_result | piextDeviceCreateWithNativeHandle (pi_native_handle NativeHandle, pi_platform Platform, pi_device *Device) |
Creates PI device object from a native handle. More... | |
pi_result | piContextCreate (const pi_context_properties *Properties, pi_uint32 NumDevices, const pi_device *Devices, void(*PFnNotify)(const char *ErrInfo, const void *PrivateInfo, size_t CB, void *UserData), void *UserData, pi_context *RetContext) |
pi_result | piContextGetInfo (pi_context Context, pi_context_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piextContextSetExtendedDeleter (pi_context Context, pi_context_extended_deleter Function, void *UserData) |
pi_result | piextContextGetNativeHandle (pi_context Context, pi_native_handle *NativeHandle) |
Gets the native handle of a PI context object. More... | |
pi_result | piextContextCreateWithNativeHandle (pi_native_handle NativeHandle, pi_uint32 NumDevices, const pi_device *Devices, bool OwnNativeHandle, pi_context *RetContext) |
Creates PI context object from a native handle. More... | |
pi_result | piContextRetain (pi_context Context) |
pi_result | piContextRelease (pi_context Context) |
pi_result | piQueueCreate (pi_context Context, pi_device Device, pi_queue_properties Flags, pi_queue *Queue) |
pi_result | piextQueueCreate (pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue) |
pi_result | piQueueGetInfo (pi_queue Queue, pi_queue_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piQueueRetain (pi_queue Queue) |
pi_result | piQueueRelease (pi_queue Queue) |
pi_result | piQueueFinish (pi_queue Queue) |
pi_result | piQueueFlush (pi_queue Queue) |
pi_result | piextQueueGetNativeHandle (pi_queue Queue, pi_native_handle *NativeHandle, int32_t *NativeHandleDesc) |
Gets the native handle of a PI queue object. More... | |
pi_result | piextQueueCreateWithNativeHandle (pi_native_handle NativeHandle, int32_t NativeHandleDesc, pi_context Context, pi_device Device, bool OwnNativeHandle, pi_queue_properties *Properties, pi_queue *Queue) |
Creates PI queue object from a native handle. More... | |
pi_result | piMemBufferCreate (pi_context Context, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *properties) |
pi_result | piMemGetInfo (pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piMemRetain (pi_mem Mem) |
pi_result | piMemRelease (pi_mem Mem) |
pi_result | piMemImageCreate (pi_context Context, pi_mem_flags Flags, const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, void *HostPtr, pi_mem *RetImage) |
pi_result | piextMemGetNativeHandle (pi_mem Mem, pi_device Dev, pi_native_handle *NativeHandle) |
Gets the native handle of a PI mem object. More... | |
pi_result | piextMemCreateWithNativeHandle (pi_native_handle NativeHandle, pi_context Context, bool ownNativeHandle, pi_mem *Mem) |
Creates PI mem object from a native handle. More... | |
pi_result | piProgramCreate (pi_context Context, const void *ILBytes, size_t Length, pi_program *Program) |
pi_result | piProgramCreateWithBinary (pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, const size_t *Lengths, const unsigned char **Binaries, size_t NumMetadataEntries, const pi_device_binary_property *Metadata, pi_int32 *BinaryStatus, pi_program *Program) |
Creates a PI program for a context and loads the given binary into it. More... | |
pi_result | piextMemImageCreateWithNativeHandle (pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, pi_mem *Img) |
Creates PI image object from a native handle. More... | |
pi_result | piProgramGetInfo (pi_program Program, pi_program_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piProgramLink (pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, const char *Options, pi_uint32 NumInputPrograms, const pi_program *InputPrograms, void(*PFnNotify)(pi_program Program, void *UserData), void *UserData, pi_program *RetProgram) |
pi_result | piProgramCompile (pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, const char *Options, pi_uint32 NumInputHeaders, const pi_program *InputHeaders, const char **HeaderIncludeNames, void(*PFnNotify)(pi_program Program, void *UserData), void *UserData) |
pi_result | piProgramBuild (pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, const char *Options, void(*PFnNotify)(pi_program Program, void *UserData), void *UserData) |
pi_result | piProgramGetBuildInfo (pi_program Program, pi_device Device, pi_program_build_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piProgramRetain (pi_program Program) |
pi_result | piProgramRelease (pi_program Program) |
pi_result | piextProgramGetNativeHandle (pi_program Program, pi_native_handle *NativeHandle) |
Gets the native handle of a PI program object. More... | |
pi_result | piextProgramCreateWithNativeHandle (pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, pi_program *Program) |
Creates PI program object from a native handle. More... | |
pi_result | piKernelCreate (pi_program Program, const char *KernelName, pi_kernel *RetKernel) |
pi_result | piKernelSetArg (pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, const void *ArgValue) |
pi_result | piextKernelSetArgMemObj (pi_kernel Kernel, pi_uint32 ArgIndex, const pi_mem_obj_property *ArgProperties, const pi_mem *ArgValue) |
pi_result | piextKernelSetArgSampler (pi_kernel Kernel, pi_uint32 ArgIndex, const pi_sampler *ArgValue) |
pi_result | piKernelGetInfo (pi_kernel Kernel, pi_kernel_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piextMemImageAllocate (pi_context Context, pi_device Device, pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_image_mem_handle *RetMem) |
API to allocate memory for bindless images. More... | |
pi_result | piextMemUnsampledImageCreate (pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_mem *RetMem, pi_image_handle *RetHandle) |
API to create bindless image handles. More... | |
pi_result | piextMemSampledImageCreate (pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, pi_mem *RetMem, pi_image_handle *RetHandle) |
API to create sampled bindless image handles. More... | |
pi_result | piextBindlessImageSamplerCreate (pi_context Context, const pi_sampler_properties *SamplerProperties, float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, pi_sampler *RetSampler) |
API to create samplers for bindless images. More... | |
pi_result | piextMemMipmapGetLevel (pi_context Context, pi_device Device, pi_image_mem_handle MipMem, unsigned int Level, pi_image_mem_handle *RetMem) |
API to retrieve individual image from mipmap. More... | |
pi_result | piextMemImageFree (pi_context Context, pi_device Device, pi_image_mem_handle MemoryHandle) |
API to free memory for bindless images. More... | |
pi_result | piextMemMipmapFree (pi_context Context, pi_device Device, pi_image_mem_handle MemoryHandle) |
API to free mipmap memory for bindless images. More... | |
pi_result | piextMemImageCopy (pi_queue Queue, void *DstPtr, void *SrcPtr, const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, const pi_image_copy_flags Flags, pi_image_offset SrcOffset, pi_image_offset DstOffset, pi_image_region CopyExtent, pi_image_region HostExtent, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
API to copy image data Host to Device or Device to Host. More... | |
pi_result | piextMemUnsampledImageHandleDestroy (pi_context Context, pi_device Device, pi_image_handle Handle) |
API to destroy bindless unsampled image handles. More... | |
pi_result | piextMemSampledImageHandleDestroy (pi_context Context, pi_device Device, pi_image_handle Handle) |
API to destroy bindless sampled image handles. More... | |
pi_result | piextMemImageGetInfo (pi_image_mem_handle MemHandle, pi_image_info ParamName, void *ParamValue, size_t *ParamValueSizeRet) |
API to query an image memory handle for specific properties. More... | |
pi_result | piextMemImportOpaqueFD (pi_context Context, pi_device Device, size_t Size, int FileDescriptor, pi_interop_mem_handle *RetHandle) |
API to import external memory in the form of a file descriptor. More... | |
pi_result | piextMemMapExternalArray (pi_context Context, pi_device Device, pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, pi_image_mem_handle *RetMem) |
API to map an interop memory handle to an image memory handle. More... | |
pi_result | piextMemReleaseInterop (pi_context Context, pi_device Device, pi_interop_mem_handle ExtMem) |
API to destroy interop memory. More... | |
pi_result | piextImportExternalSemaphoreOpaqueFD (pi_context Context, pi_device Device, int FileDescriptor, pi_interop_semaphore_handle *RetHandle) |
API to import an external semaphore in the form of a file descriptor. More... | |
pi_result | piextDestroyExternalSemaphore (pi_context Context, pi_device Device, pi_interop_semaphore_handle SemHandle) |
API to destroy the external semaphore handle. More... | |
pi_result | piextWaitExternalSemaphore (pi_queue Queue, pi_interop_semaphore_handle SemHandle, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
API to instruct the queue with a non-blocking wait on an external semaphore. More... | |
pi_result | piextSignalExternalSemaphore (pi_queue Queue, pi_interop_semaphore_handle SemHandle, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
API to instruct the queue to signal the external semaphore handle once all previous commands have completed execution. More... | |
pi_result | piKernelGetGroupInfo (pi_kernel Kernel, pi_device Device, pi_kernel_group_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piKernelGetSubGroupInfo (pi_kernel Kernel, pi_device Device, pi_kernel_sub_group_info ParamName, size_t InputValueSize, const void *InputValue, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
API to query information from the sub-group from a kernel. More... | |
pi_result | piKernelRetain (pi_kernel Kernel) |
pi_result | piKernelRelease (pi_kernel Kernel) |
pi_result | piEnqueueKernelLaunch (pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *OutEvent) |
pi_result | piextEnqueueCooperativeKernelLaunch (pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *OutEvent) |
pi_result | piextKernelCreateWithNativeHandle (pi_native_handle NativeHandle, pi_context Context, pi_program Program, bool OwnNativeHandle, pi_kernel *Kernel) |
Creates PI kernel object from a native handle. More... | |
pi_result | piextKernelGetNativeHandle (pi_kernel Kernel, pi_native_handle *NativeHandle) |
Gets the native handle of a PI kernel object. More... | |
pi_result | piextKernelSuggestMaxCooperativeGroupCount (pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize, pi_uint32 *GroupCountRet) |
Gets the max work group count for a cooperative kernel. More... | |
pi_result | piEventCreate (pi_context Context, pi_event *RetEvent) |
Create PI event object in a signalled/completed state. More... | |
pi_result | piEventGetInfo (pi_event Event, pi_event_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piEventGetProfilingInfo (pi_event Event, pi_profiling_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piEventsWait (pi_uint32 NumEvents, const pi_event *EventList) |
pi_result | piEventSetCallback (pi_event Event, pi_int32 CommandExecCallbackType, void(*PFnNotify)(pi_event Event, pi_int32 EventCommandStatus, void *UserData), void *UserData) |
pi_result | piEventSetStatus (pi_event Event, pi_int32 ExecutionStatus) |
pi_result | piEventRetain (pi_event Event) |
pi_result | piEventRelease (pi_event Event) |
pi_result | piextEventGetNativeHandle (pi_event Event, pi_native_handle *NativeHandle) |
Gets the native handle of a PI event object. More... | |
pi_result | piextEventCreateWithNativeHandle (pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, pi_event *Event) |
Creates PI event object from a native handle. More... | |
pi_result | piSamplerCreate (pi_context Context, const pi_sampler_properties *SamplerProperties, pi_sampler *RetSampler) |
pi_result | piSamplerGetInfo (pi_sampler Sampler, pi_sampler_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piSamplerRetain (pi_sampler Sampler) |
pi_result | piSamplerRelease (pi_sampler Sampler) |
pi_result | piEnqueueEventsWait (pi_queue Queue, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *OutEvent) |
pi_result | piEnqueueEventsWaitWithBarrier (pi_queue Queue, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *OutEvent) |
pi_result | piEnqueueMemBufferRead (pi_queue Queue, pi_mem Src, pi_bool BlockingRead, size_t Offset, size_t Size, void *Dst, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemBufferReadRect (pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemBufferWrite (pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, size_t Offset, size_t Size, const void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemBufferWriteRect (pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemBufferCopy (pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemBufferCopyRect (pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemBufferFill (pi_queue Queue, pi_mem Buffer, const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemBufferMap (pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, pi_map_flags MapFlags, size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *OutEvent, void **RetMap) |
pi_result | piEnqueueMemUnmap (pi_queue Queue, pi_mem Mem, void *MappedPtr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *OutEvent) |
pi_result | piMemImageGetInfo (pi_mem Image, pi_image_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piEnqueueMemImageRead (pi_queue Queue, pi_mem Image, pi_bool BlockingRead, pi_image_offset Origin, pi_image_region Region, size_t RowPitch, size_t SlicePitch, void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemImageWrite (pi_queue Queue, pi_mem Image, pi_bool BlockingWrite, pi_image_offset Origin, pi_image_region Region, size_t InputRowPitch, size_t InputSlicePitch, const void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemImageCopy (pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, pi_image_offset SrcOrigin, pi_image_offset DstOrigin, pi_image_region Region, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piEnqueueMemImageFill (pi_queue Queue, pi_mem Image, const void *FillColor, const size_t *Origin, const size_t *Region, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
pi_result | piMemBufferPartition (pi_mem Buffer, pi_mem_flags Flags, pi_buffer_create_type BufferCreateType, void *BufferCreateInfo, pi_mem *RetMem) |
pi_result | piextGetDeviceFunctionPointer (pi_device Device, pi_program Program, const char *FunctionName, pi_uint64 *FunctionPointerRet) |
Retrieves a device function pointer to a user-defined function. More... | |
pi_result | piextGetGlobalVariablePointer (pi_device Device, pi_program Program, const char *GlobalVariableName, size_t *GlobalVariableSize, void **GlobalVariablePointerRet) |
pi_result | piextUSMDeviceAlloc (void **ResultPtr, pi_context Context, pi_device Device, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment) |
Allocates device memory. More... | |
pi_result | piextUSMSharedAlloc (void **ResultPtr, pi_context Context, pi_device Device, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment) |
Allocates memory accessible on both host and device. More... | |
pi_result | piextUSMPitchedAlloc (void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes) |
Allocates memory accessible on device. More... | |
pi_result | piextUSMHostAlloc (void **ResultPtr, pi_context Context, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment) |
Allocates host memory accessible by the device. More... | |
pi_result | piextUSMFree (pi_context Context, void *Ptr) |
Indicates that the allocated USM memory is no longer needed on the runtime side. More... | |
pi_result | piextKernelSetArgPointer (pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, const void *ArgValue) |
Sets up pointer arguments for CL kernels. More... | |
pi_result | piextUSMEnqueueMemset (pi_queue Queue, void *Ptr, pi_int32 Value, size_t Count, pi_uint32 NumEventsInWaitlist, const pi_event *EventsWaitlist, pi_event *Event) |
USM Memset API. More... | |
pi_result | piextUSMEnqueueMemcpy (pi_queue Queue, pi_bool Blocking, void *DstPtr, const void *SrcPtr, size_t Size, pi_uint32 NumEventsInWaitlist, const pi_event *EventsWaitlist, pi_event *Event) |
USM Memcpy API. More... | |
pi_result | piextUSMEnqueuePrefetch (pi_queue Queue, const void *Ptr, size_t Size, pi_usm_migration_flags Flags, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *OutEvent) |
Hint to migrate memory to the device. More... | |
pi_result | piextUSMEnqueueMemAdvise (pi_queue Queue, const void *Ptr, size_t Length, pi_mem_advice Advice, pi_event *OutEvent) |
USM Memadvise API. More... | |
pi_result | piextUSMEnqueueFill2D (pi_queue Queue, void *Ptr, size_t Pitch, size_t PatternSize, const void *Pattern, size_t Width, size_t Height, pi_uint32 NumEventsWaitList, const pi_event *EventsWaitList, pi_event *Event) |
USM 2D fill API. More... | |
pi_result | piextUSMEnqueueMemset2D (pi_queue Queue, void *Ptr, size_t Pitch, int Value, size_t Width, size_t Height, pi_uint32 NumEventsWaitList, const pi_event *EventsWaitlist, pi_event *Event) |
USM 2D Memset API. More... | |
pi_result | piextUSMEnqueueMemcpy2D (pi_queue Queue, pi_bool Blocking, void *DstPtr, size_t DstPitch, const void *SrcPtr, size_t SrcPitch, size_t Width, size_t Height, pi_uint32 NumEventsInWaitlist, const pi_event *EventWaitlist, pi_event *Event) |
USM 2D Memcpy API. More... | |
pi_result | piextUSMGetMemAllocInfo (pi_context Context, const void *Ptr, pi_mem_alloc_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation. More... | |
pi_result | piextUSMImport (const void *HostPtr, size_t Size, pi_context Context) |
Import host system memory into USM. More... | |
pi_result | piextUSMRelease (const void *HostPtr, pi_context Context) |
Release host system memory from USM. More... | |
pi_result | piextEnqueueDeviceGlobalVariableWrite (pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, pi_event *Event) |
Device global variable. More... | |
pi_result | piextEnqueueDeviceGlobalVariableRead (pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, pi_event *Event) |
API reading data from a device global variable to host. More... | |
pi_result | piextEnqueueReadHostPipe (pi_queue Queue, pi_program Program, const char *PipeSymbol, pi_bool Blocking, void *Ptr, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, pi_event *Event) |
Plugin. More... | |
pi_result | piextEnqueueWriteHostPipe (pi_queue Queue, pi_program Program, const char *PipeSymbol, pi_bool Blocking, void *Ptr, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, pi_event *Event) |
Write to pipe of a given name. More... | |
pi_result | piKernelSetExecInfo (pi_kernel Kernel, pi_kernel_exec_info ParamName, size_t ParamValueSize, const void *ParamValue) |
API to set attributes controlling kernel execution. More... | |
pi_result | piextProgramSetSpecializationConstant (pi_program Prog, pi_uint32 SpecID, size_t Size, const void *SpecValue) |
Sets a specialization constant to a specific value. More... | |
pi_result | piextCommandBufferCreate (pi_context Context, pi_device Device, const pi_ext_command_buffer_desc *Desc, pi_ext_command_buffer *RetCommandBuffer) |
API to create a command-buffer. More... | |
pi_result | piextCommandBufferRetain (pi_ext_command_buffer CommandBuffer) |
API to increment the reference count of the command-buffer. More... | |
pi_result | piextCommandBufferRelease (pi_ext_command_buffer CommandBuffer) |
API to decrement the reference count of the command-buffer. More... | |
pi_result | piextCommandBufferFinalize (pi_ext_command_buffer CommandBuffer) |
API to stop command-buffer recording such that no more commands can be appended, and makes the command-buffer ready to enqueue on a command-queue. More... | |
pi_result | piextCommandBufferNDRangeKernel (pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, pi_ext_command_buffer_command *Command) |
API to append a kernel execution command to the command-buffer. More... | |
pi_result | piextCommandBufferMemcpyUSM (pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, size_t Size, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a USM memcpy command to the command-buffer. More... | |
pi_result | piextCommandBufferMemBufferCopy (pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a mem buffer copy command to the command-buffer. More... | |
pi_result | piextCommandBufferMemBufferCopyRect (pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a rectangular mem buffer copy command to the command-buffer. More... | |
pi_result | piextCommandBufferMemBufferRead (pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a mem buffer read command to the command-buffer. More... | |
pi_result | piextCommandBufferMemBufferReadRect (pi_ext_command_buffer CommandBuffer, pi_mem Buffer, pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a rectangular mem buffer read command to the command-buffer. More... | |
pi_result | piextCommandBufferMemBufferWrite (pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a mem buffer write command to the command-buffer. More... | |
pi_result | piextCommandBufferMemBufferWriteRect (pi_ext_command_buffer CommandBuffer, pi_mem Buffer, pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a rectangular mem buffer write command to the command-buffer. More... | |
pi_result | piextCommandBufferMemBufferFill (pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a mem buffer fill command to the command-buffer. More... | |
pi_result | piextCommandBufferFillUSM (pi_ext_command_buffer CommandBuffer, void *Ptr, const void *Pattern, size_t PatternSize, size_t Size, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a USM fill command to the command-buffer. More... | |
pi_result | piextCommandBufferPrefetchUSM (pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a USM Prefetch command to the command-buffer. More... | |
pi_result | piextCommandBufferAdviseUSM (pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) |
API to append a USM Advise command to the command-buffer. More... | |
pi_result | piextEnqueueCommandBuffer (pi_ext_command_buffer CommandBuffer, pi_queue Queue, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) |
API to submit the command-buffer to queue for execution, returns an error if the command-buffer is not finalized or another instance of the same command-buffer is currently executing. More... | |
pi_result | piextCommandBufferUpdateKernelLaunch (pi_ext_command_buffer_command Command, pi_ext_command_buffer_update_kernel_launch_desc *Desc) |
API to update a kernel launch command inside of a command-buffer. More... | |
pi_result | piextCommandBufferRetainCommand (pi_ext_command_buffer_command Command) |
API to increment the reference count of a command-buffer command. More... | |
pi_result | piextCommandBufferReleaseCommand (pi_ext_command_buffer_command Command) |
API to decrement the reference count of a command-buffer command. More... | |
pi_result | piextPluginGetOpaqueData (void *opaque_data_param, void **opaque_data_return) |
API to get Plugin internal data, opaque to SYCL RT. More... | |
pi_result | piTearDown (void *PluginParameter) |
API to notify that the plugin should clean up its resources. More... | |
pi_result | piGetDeviceAndHostTimer (pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) |
Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the host timer at the closest possible point in time to that at which DeviceTime was returned. More... | |
pi_result | piextEnablePeerAccess (pi_device command_device, pi_device peer_device) |
pi_result | piextDisablePeerAccess (pi_device command_device, pi_device peer_device) |
pi_result | piextPeerAccessGetInfo (pi_device command_device, pi_device peer_device, pi_peer_attr attr, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) |
pi_result | piPluginInit (pi_plugin *PluginInit) |
Variables | |
const char | SupportedVersion [] = _PI_CUDA_PLUGIN_VERSION_STRING |
Implementation of CUDA Plugin.
Definition in file pi_cuda.cpp.
#define _PI_API | ( | api | ) | (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); |
cuda_tracing_context_t_* createCUDATracingContext | ( | ) |
Referenced by piPluginInit().
void disableCUDATracing | ( | cuda_tracing_context_t_ * | ctx | ) |
void enableCUDATracing | ( | cuda_tracing_context_t_ * | ctx | ) |
Referenced by piPluginInit().
void freeCUDATracingContext | ( | cuda_tracing_context_t_ * | Ctx | ) |
pi_result piContextCreate | ( | const pi_context_properties * | Properties, |
pi_uint32 | NumDevices, | ||
const pi_device * | Devices, | ||
void(*)(const char *ErrInfo, const void *PrivateInfo, size_t CB, void *UserData) | PFnNotify, | ||
void * | UserData, | ||
pi_context * | RetContext | ||
) |
Definition at line 114 of file pi_cuda.cpp.
References pi2ur::piContextCreate().
pi_result piContextGetInfo | ( | pi_context | Context, |
pi_context_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 124 of file pi_cuda.cpp.
pi_result piContextRelease | ( | pi_context | Context | ) |
Definition at line 157 of file pi_cuda.cpp.
pi_result piContextRetain | ( | pi_context | Context | ) |
Definition at line 152 of file pi_cuda.cpp.
pi_result piDeviceGetInfo | ( | pi_device | Device, |
pi_device_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for PI_DEVICE_INFO_EXTENSIONS query when the device supports native asserts.
Definition at line 78 of file pi_cuda.cpp.
pi_result piDevicePartition | ( | pi_device | Device, |
const pi_device_partition_property * | Properties, | ||
pi_uint32 | NumDevices, | ||
pi_device * | OutDevices, | ||
pi_uint32 * | OutNumDevices | ||
) |
Definition at line 85 of file pi_cuda.cpp.
Definition at line 74 of file pi_cuda.cpp.
Definition at line 70 of file pi_cuda.cpp.
pi_result piDevicesGet | ( | pi_platform | Platform, |
pi_device_type | DeviceType, | ||
pi_uint32 | NumEntries, | ||
pi_device * | Devices, | ||
pi_uint32 * | NumDevices | ||
) |
Definition at line 63 of file pi_cuda.cpp.
pi_result piEnqueueEventsWait | ( | pi_queue | Queue, |
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | OutEvent | ||
) |
Definition at line 657 of file pi_cuda.cpp.
pi_result piEnqueueEventsWaitWithBarrier | ( | pi_queue | Queue, |
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | OutEvent | ||
) |
Definition at line 665 of file pi_cuda.cpp.
pi_result piEnqueueKernelLaunch | ( | pi_queue | Queue, |
pi_kernel | Kernel, | ||
pi_uint32 | WorkDim, | ||
const size_t * | GlobalWorkOffset, | ||
const size_t * | GlobalWorkSize, | ||
const size_t * | LocalWorkSize, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | OutEvent | ||
) |
Definition at line 537 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferCopy | ( | pi_queue | Queue, |
pi_mem | SrcMem, | ||
pi_mem | DstMem, | ||
size_t | SrcOffset, | ||
size_t | DstOffset, | ||
size_t | Size, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 726 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferCopyRect | ( | pi_queue | Queue, |
pi_mem | SrcMem, | ||
pi_mem | DstMem, | ||
pi_buff_rect_offset | SrcOrigin, | ||
pi_buff_rect_offset | DstOrigin, | ||
pi_buff_rect_region | Region, | ||
size_t | SrcRowPitch, | ||
size_t | SrcSlicePitch, | ||
size_t | DstRowPitch, | ||
size_t | DstSlicePitch, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 737 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferFill | ( | pi_queue | Queue, |
pi_mem | Buffer, | ||
const void * | Pattern, | ||
size_t | PatternSize, | ||
size_t | Offset, | ||
size_t | Size, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 750 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferMap | ( | pi_queue | Queue, |
pi_mem | Mem, | ||
pi_bool | BlockingMap, | ||
pi_map_flags | MapFlags, | ||
size_t | Offset, | ||
size_t | Size, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | OutEvent, | ||
void ** | RetMap | ||
) |
Definition at line 762 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferRead | ( | pi_queue | Queue, |
pi_mem | Src, | ||
pi_bool | BlockingRead, | ||
size_t | Offset, | ||
size_t | Size, | ||
void * | Dst, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 674 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferReadRect | ( | pi_queue | Queue, |
pi_mem | Buffer, | ||
pi_bool | BlockingRead, | ||
pi_buff_rect_offset | BufferOffset, | ||
pi_buff_rect_offset | HostOffset, | ||
pi_buff_rect_region | Region, | ||
size_t | BufferRowPitch, | ||
size_t | BufferSlicePitch, | ||
size_t | HostRowPitch, | ||
size_t | HostSlicePitch, | ||
void * | Ptr, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 686 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferWrite | ( | pi_queue | Queue, |
pi_mem | Buffer, | ||
pi_bool | BlockingWrite, | ||
size_t | Offset, | ||
size_t | Size, | ||
const void * | Ptr, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 700 of file pi_cuda.cpp.
pi_result piEnqueueMemBufferWriteRect | ( | pi_queue | Queue, |
pi_mem | Buffer, | ||
pi_bool | BlockingWrite, | ||
pi_buff_rect_offset | BufferOffset, | ||
pi_buff_rect_offset | HostOffset, | ||
pi_buff_rect_region | Region, | ||
size_t | BufferRowPitch, | ||
size_t | BufferSlicePitch, | ||
size_t | HostRowPitch, | ||
size_t | HostSlicePitch, | ||
const void * | Ptr, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 712 of file pi_cuda.cpp.
pi_result piEnqueueMemImageCopy | ( | pi_queue | Queue, |
pi_mem | SrcImage, | ||
pi_mem | DstImage, | ||
pi_image_offset | SrcOrigin, | ||
pi_image_offset | DstOrigin, | ||
pi_image_region | Region, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 815 of file pi_cuda.cpp.
pi_result piEnqueueMemImageFill | ( | pi_queue | Queue, |
pi_mem | Image, | ||
const void * | FillColor, | ||
const size_t * | Origin, | ||
const size_t * | Region, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 824 of file pi_cuda.cpp.
pi_result piEnqueueMemImageRead | ( | pi_queue | Queue, |
pi_mem | Image, | ||
pi_bool | BlockingRead, | ||
pi_image_offset | Origin, | ||
pi_image_region | Region, | ||
size_t | RowPitch, | ||
size_t | SlicePitch, | ||
void * | Ptr, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 789 of file pi_cuda.cpp.
pi_result piEnqueueMemImageWrite | ( | pi_queue | Queue, |
pi_mem | Image, | ||
pi_bool | BlockingWrite, | ||
pi_image_offset | Origin, | ||
pi_image_region | Region, | ||
size_t | InputRowPitch, | ||
size_t | InputSlicePitch, | ||
const void * | Ptr, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | Event | ||
) |
Definition at line 801 of file pi_cuda.cpp.
pi_result piEnqueueMemUnmap | ( | pi_queue | Queue, |
pi_mem | Mem, | ||
void * | MappedPtr, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | OutEvent | ||
) |
Definition at line 773 of file pi_cuda.cpp.
pi_result piEventCreate | ( | pi_context | context, |
pi_event * | ret_event | ||
) |
Create PI event object in a signalled/completed state.
context | is the PI context of the event. |
ret_event | is the PI even created. |
Definition at line 579 of file pi_cuda.cpp.
pi_result piEventGetInfo | ( | pi_event | Event, |
pi_event_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 583 of file pi_cuda.cpp.
pi_result piEventGetProfilingInfo | ( | pi_event | Event, |
pi_profiling_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 590 of file pi_cuda.cpp.
Definition at line 617 of file pi_cuda.cpp.
Definition at line 615 of file pi_cuda.cpp.
pi_result piEventSetCallback | ( | pi_event | Event, |
pi_int32 | CommandExecCallbackType, | ||
void(*)(pi_event Event, pi_int32 EventCommandStatus, void *UserData) | PFnNotify, | ||
void * | UserData | ||
) |
Definition at line 602 of file pi_cuda.cpp.
References pi2ur::piEventSetCallback().
Definition at line 611 of file pi_cuda.cpp.
Definition at line 598 of file pi_cuda.cpp.
pi_result piextBindlessImageSamplerCreate | ( | pi_context | context, |
const pi_sampler_properties * | sampler_properties, | ||
float | min_mipmap_level_clamp, | ||
float | max_mipmap_level_clamp, | ||
float | max_anisotropy, | ||
pi_sampler * | result_sampler | ||
) |
API to create samplers for bindless images.
context | is the pi_context |
device | is the pi_device |
sampler_properties | is the pointer to the sampler properties bitfield |
min_mipmap_level_clamp | is the minimum mipmap level to sample from |
max_mipmap_level_clamp | is the maximum mipmap level to sample from |
max_anisotropy | is the maximum anisotropic ratio |
result_sampler | is the returned sampler |
Definition at line 400 of file pi_cuda.cpp.
pi_result piextCommandBufferAdviseUSM | ( | pi_ext_command_buffer | command_buffer, |
const void * | ptr, | ||
size_t | length, | ||
pi_mem_advice | advice, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a USM Advise command to the command-buffer.
command_buffer | The command-buffer to append onto. |
ptr | is the data to be advised. |
length | is the size in bytes of the memory to advise. |
advice | is device specific advice. |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1204 of file pi_cuda.cpp.
pi_result piextCommandBufferCreate | ( | pi_context | context, |
pi_device | device, | ||
const pi_ext_command_buffer_desc * | desc, | ||
pi_ext_command_buffer * | ret_command_buffer | ||
) |
API to create a command-buffer.
context | The context to associate the command-buffer with. |
device | The device to associate the command-buffer with. |
desc | Descriptor for the new command-buffer. |
ret_command_buffer | Pointer to fill with the address of the new command-buffer. |
Definition at line 1068 of file pi_cuda.cpp.
pi_result piextCommandBufferFillUSM | ( | pi_ext_command_buffer | command_buffer, |
void * | ptr, | ||
const void * | pattern, | ||
size_t | pattern_size, | ||
size_t | size, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a USM fill command to the command-buffer.
command_buffer | The command-buffer to append onto. |
ptr | pointer to the USM allocation to fill. |
pattern | pointer to the pattern to fill ptr with. |
pattern_size | size of the pattern in bytes. |
size | fill size in bytes. |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1184 of file pi_cuda.cpp.
pi_result piextCommandBufferFinalize | ( | pi_ext_command_buffer | command_buffer | ) |
API to stop command-buffer recording such that no more commands can be appended, and makes the command-buffer ready to enqueue on a command-queue.
command_buffer | The command_buffer to finalize. |
Definition at line 1083 of file pi_cuda.cpp.
pi_result piextCommandBufferMemBufferCopy | ( | pi_ext_command_buffer | command_buffer, |
pi_mem | src_buffer, | ||
pi_mem | dst_buffer, | ||
size_t | src_offset, | ||
size_t | dst_offset, | ||
size_t | size, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a mem buffer copy command to the command-buffer.
command_buffer | The command-buffer to append onto. |
src_buffer | is the data to be copied |
dst_buffer | is the location the data will be copied |
src_offset | offset into src_buffer |
dst_offset | offset into dst_buffer |
size | is number of bytes to copy |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1108 of file pi_cuda.cpp.
pi_result piextCommandBufferMemBufferCopyRect | ( | pi_ext_command_buffer | command_buffer, |
pi_mem | src_buffer, | ||
pi_mem | dst_buffer, | ||
pi_buff_rect_offset | src_origin, | ||
pi_buff_rect_offset | dst_origin, | ||
pi_buff_rect_region | region, | ||
size_t | src_row_pitch, | ||
size_t | src_slice_pitch, | ||
size_t | dst_row_pitch, | ||
size_t | dst_slice_pitch, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a rectangular mem buffer copy command to the command-buffer.
command_buffer | The command-buffer to append onto. |
src_buffer | is the data to be copied |
dst_buffer | is the location the data will be copied |
src_origin | offset for the start of the region to copy in src_buffer |
dst_origin | offset for the start of the region to copy in dst_buffer |
region | The size of the region to be copied |
src_row_pitch | Row pitch for the src data |
src_slice_pitch | Slice pitch for the src data |
dst_row_pitch | Row pitch for the dst data |
dst_slice_pitch | Slice pitch for the dst data |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1118 of file pi_cuda.cpp.
pi_result piextCommandBufferMemBufferFill | ( | pi_ext_command_buffer | command_buffer, |
pi_mem | buffer, | ||
const void * | pattern, | ||
size_t | pattern_size, | ||
size_t | offset, | ||
size_t | size, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a mem buffer fill command to the command-buffer.
command_buffer | The command-buffer to append onto. |
buffer | is the location to fill the data. |
pattern | pointer to the pattern to fill the buffer with. |
pattern_size | size of the pattern in bytes. |
offset | Offset into the buffer to fill from. |
size | fill size in bytes. |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1174 of file pi_cuda.cpp.
pi_result piextCommandBufferMemBufferRead | ( | pi_ext_command_buffer | command_buffer, |
pi_mem | buffer, | ||
size_t | offset, | ||
size_t | size, | ||
void * | dst, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a mem buffer read command to the command-buffer.
command_buffer | The command-buffer to append onto. |
buffer | is the data to be read |
offset | offset into buffer |
size | is number of bytes to read |
dst | is the pointer to the destination |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1130 of file pi_cuda.cpp.
pi_result piextCommandBufferMemBufferReadRect | ( | pi_ext_command_buffer | command_buffer, |
pi_mem | buffer, | ||
pi_buff_rect_offset | buffer_offset, | ||
pi_buff_rect_offset | host_offset, | ||
pi_buff_rect_region | region, | ||
size_t | buffer_row_pitch, | ||
size_t | buffer_slice_pitch, | ||
size_t | host_row_pitch, | ||
size_t | host_slice_pitch, | ||
void * | ptr, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a rectangular mem buffer read command to the command-buffer.
command_buffer | The command-buffer to append onto. |
buffer | is the data to be read |
buffer_offset | offset for the start of the region to read in buffer |
host_offset | offset for the start of the region to be written from ptr |
region | The size of the region to read |
buffer_row_pitch | Row pitch for the source buffer data |
buffer_slice_pitch | Slice pitch for the source buffer data |
host_row_pitch | Row pitch for the destination data ptr |
host_slice_pitch | Slice pitch for the destination data ptr |
ptr | is the location the data will be written |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1139 of file pi_cuda.cpp.
pi_result piextCommandBufferMemBufferWrite | ( | pi_ext_command_buffer | command_buffer, |
pi_mem | buffer, | ||
size_t | offset, | ||
size_t | size, | ||
const void * | ptr, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a mem buffer write command to the command-buffer.
command_buffer | The command-buffer to append onto. |
buffer | is the location to write the data |
offset | offset into buffer |
size | is number of bytes to write |
ptr | is the pointer to the source |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1152 of file pi_cuda.cpp.
pi_result piextCommandBufferMemBufferWriteRect | ( | pi_ext_command_buffer | command_buffer, |
pi_mem | buffer, | ||
pi_buff_rect_offset | buffer_offset, | ||
pi_buff_rect_offset | host_offset, | ||
pi_buff_rect_region | region, | ||
size_t | buffer_row_pitch, | ||
size_t | buffer_slice_pitch, | ||
size_t | host_row_pitch, | ||
size_t | host_slice_pitch, | ||
const void * | ptr, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a rectangular mem buffer write command to the command-buffer.
command_buffer | The command-buffer to append onto. |
buffer | is the location to write the data |
buffer_offset | offset for the start of the region to write in buffer |
host_offset | offset for the start of the region to be read from ptr |
region | The size of the region to write |
buffer_row_pitch | Row pitch for the buffer data |
buffer_slice_pitch | Slice pitch for the buffer data |
host_row_pitch | Row pitch for the source data ptr |
host_slice_pitch | Slice pitch for the source data ptr |
ptr | is the pointer to the source |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1161 of file pi_cuda.cpp.
pi_result piextCommandBufferMemcpyUSM | ( | pi_ext_command_buffer | command_buffer, |
void * | dst_ptr, | ||
const void * | src_ptr, | ||
size_t | size, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a USM memcpy command to the command-buffer.
command_buffer | The command-buffer to append onto. |
dst_ptr | is the location the data will be copied |
src_ptr | is the data to be copied |
size | is number of bytes to copy |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1099 of file pi_cuda.cpp.
pi_result piextCommandBufferNDRangeKernel | ( | pi_ext_command_buffer | command_buffer, |
pi_kernel | kernel, | ||
pi_uint32 | work_dim, | ||
const size_t * | global_work_offset, | ||
const size_t * | global_work_size, | ||
const size_t * | local_work_size, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point, | ||
pi_ext_command_buffer_command * | command | ||
) |
API to append a kernel execution command to the command-buffer.
command_buffer | The command-buffer to append onto. |
kernel | The kernel to append. |
work_dim | Dimension of the kernel execution. |
global_work_offset | Offset to use when executing kernel. |
global_work_size | Global work size to use when executing kernel. |
local_work_size | Local work size to use when executing kernel. |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this kernel execution. |
command | Return pointer to the command representing this kernel execution. |
Definition at line 1087 of file pi_cuda.cpp.
pi_result piextCommandBufferPrefetchUSM | ( | pi_ext_command_buffer | command_buffer, |
const void * | ptr, | ||
size_t | size, | ||
pi_usm_migration_flags | flags, | ||
pi_uint32 | num_sync_points_in_wait_list, | ||
const pi_ext_sync_point * | sync_point_wait_list, | ||
pi_ext_sync_point * | sync_point | ||
) |
API to append a USM Prefetch command to the command-buffer.
command_buffer | The command-buffer to append onto. |
ptr | points to the memory to migrate. |
size | is the number of bytes to migrate. |
flags | is a bitfield used to specify memory migration options. |
num_sync_points_in_wait_list | The number of sync points in the provided wait list. |
sync_point_wait_list | A list of sync points that this command must wait on. |
sync_point | The sync_point associated with this memory operation. |
Definition at line 1195 of file pi_cuda.cpp.
pi_result piextCommandBufferRelease | ( | pi_ext_command_buffer | command_buffer | ) |
API to decrement the reference count of the command-buffer.
After the command_buffer reference count becomes zero and has finished execution, the command-buffer is deleted.
command_buffer | The command_buffer to release. |
Definition at line 1079 of file pi_cuda.cpp.
pi_result piextCommandBufferReleaseCommand | ( | pi_ext_command_buffer_command | command | ) |
API to decrement the reference count of a command-buffer command.
After the command reference count becomes zero, the command is deleted.
command | The command to release. |
Definition at line 1234 of file pi_cuda.cpp.
pi_result piextCommandBufferRetain | ( | pi_ext_command_buffer | command_buffer | ) |
API to increment the reference count of the command-buffer.
command_buffer | The command_buffer to retain. |
Definition at line 1075 of file pi_cuda.cpp.
pi_result piextCommandBufferRetainCommand | ( | pi_ext_command_buffer_command | command | ) |
API to increment the reference count of a command-buffer command.
command | The command to release. |
Definition at line 1229 of file pi_cuda.cpp.
pi_result piextCommandBufferUpdateKernelLaunch | ( | pi_ext_command_buffer_command | command, |
pi_ext_command_buffer_update_kernel_launch_desc * | desc | ||
) |
API to update a kernel launch command inside of a command-buffer.
command | The command to be updated. |
desc | Descriptor which describes the updated parameters of the kernel launch. |
Definition at line 1222 of file pi_cuda.cpp.
pi_result piextContextCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_uint32 | numDevices, | ||
const pi_device * | devices, | ||
bool | pluginOwnsNativeHandle, | ||
pi_context * | context | ||
) |
Creates PI context object from a native handle.
NOTE: The created PI object takes ownership of the native handle. NOTE: The number of devices and the list of devices is needed for Level Zero backend because there is no possilibity to query this information from context handle for Level Zero. If backend has API to query a list of devices from the context native handle then these parameters are ignored.
nativeHandle | is the native handle to create PI context from. |
numDevices | is the number of devices in the context. Parameter is ignored if number of devices can be queried from the context native handle for a backend. |
devices | is the list of devices in the context. Parameter is ignored if devices can be queried from the context native handle for a backend. |
pluginOwnsNativeHandle | Indicates whether the created PI object should take ownership of the native handle. |
context | is the PI context created from the native handle. |
Definition at line 143 of file pi_cuda.cpp.
pi_result piextContextGetNativeHandle | ( | pi_context | context, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native handle of a PI context object.
context | is the PI context to get the native handle of. |
nativeHandle | is the native handle of context. |
Definition at line 138 of file pi_cuda.cpp.
pi_result piextContextSetExtendedDeleter | ( | pi_context | Context, |
pi_context_extended_deleter | Function, | ||
void * | UserData | ||
) |
Definition at line 132 of file pi_cuda.cpp.
pi_result piextDestroyExternalSemaphore | ( | pi_context | context, |
pi_device | device, | ||
pi_interop_semaphore_handle | sem_handle | ||
) |
API to destroy the external semaphore handle.
context | is the pi_context |
device | is the pi_device |
sem_handle | is the interop semaphore handle to the external semaphore to be destroyed |
Definition at line 487 of file pi_cuda.cpp.
pi_result piextDeviceCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_platform | platform, | ||
pi_device * | device | ||
) |
Creates PI device object from a native handle.
NOTE: The created PI object takes ownership of the native handle.
nativeHandle | is the native handle to create PI device from. |
platform | is the platform of the device (optional). |
device | is the PI device created from the native handle. |
Definition at line 106 of file pi_cuda.cpp.
pi_result piextDeviceGetNativeHandle | ( | pi_device | device, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native handle of a PI device object.
device | is the PI device to get the native handle of. |
nativeHandle | is the native handle of device. |
Definition at line 100 of file pi_cuda.cpp.
pi_result piextDeviceSelectBinary | ( | pi_device | Device, |
pi_device_binary * | Binaries, | ||
pi_uint32 | NumBinaries, | ||
pi_uint32 * | SelectedBinaryInd | ||
) |
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition at line 93 of file pi_cuda.cpp.
Definition at line 1258 of file pi_cuda.cpp.
Definition at line 1252 of file pi_cuda.cpp.
pi_result piextEnqueueCommandBuffer | ( | pi_ext_command_buffer | command_buffer, |
pi_queue | queue, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
API to submit the command-buffer to queue for execution, returns an error if the command-buffer is not finalized or another instance of the same command-buffer is currently executing.
command_buffer | The command-buffer to be submitted. |
queue | The PI queue to submit on. |
num_events_in_wait_list | The number of events that this execution depends on. |
event_wait_list | List of pi_events to wait on. |
event | The pi_event associated with this enqueue. |
Definition at line 1213 of file pi_cuda.cpp.
pi_result piextEnqueueCooperativeKernelLaunch | ( | pi_queue | Queue, |
pi_kernel | Kernel, | ||
pi_uint32 | WorkDim, | ||
const size_t * | GlobalWorkOffset, | ||
const size_t * | GlobalWorkSize, | ||
const size_t * | LocalWorkSize, | ||
pi_uint32 | NumEventsInWaitList, | ||
const pi_event * | EventWaitList, | ||
pi_event * | OutEvent | ||
) |
Definition at line 547 of file pi_cuda.cpp.
pi_result piextEnqueueDeviceGlobalVariableRead | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | name, | ||
pi_bool | blocking_read, | ||
size_t | count, | ||
size_t | offset, | ||
void * | dst, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
API reading data from a device global variable to host.
queue | is the queue |
program | is the program containing the device global variable |
blocking_read | is true if the read should block |
name | is the unique identifier for the device global variable |
count | is the number of bytes to copy |
offset | is the byte offset into the device global variable to start copying |
dst | is a pointer to where the data must be copied to |
num_events_in_wait_list | is a number of events in the wait list |
event_wait_list | is the wait list |
event | is the resulting event |
Definition at line 1002 of file pi_cuda.cpp.
pi_result piextEnqueueDeviceGlobalVariableWrite | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | name, | ||
pi_bool | blocking_write, | ||
size_t | count, | ||
size_t | offset, | ||
const void * | src, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
Device global variable.
API for writing data from host to a device global variable.
queue | is the queue |
program | is the program containing the device global variable |
blocking_write | is true if the write should block |
name | is the unique identifier for the device global variable |
count | is the number of bytes to copy |
offset | is the byte offset into the device global variable to start copying |
src | is a pointer to where the data must be copied from |
num_events_in_wait_list | is a number of events in the wait list |
event_wait_list | is the wait list |
event | is the resulting event |
Definition at line 993 of file pi_cuda.cpp.
pi_result piextEnqueueReadHostPipe | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | pipe_symbol, | ||
pi_bool | blocking, | ||
void * | ptr, | ||
size_t | size, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Plugin.
Read from pipe of a given name
queue | a valid host command-queue in which the read / write command will be queued. command_queue and program must be created with the same OpenCL context. |
program | a program object with a successfully built executable. |
pipe_symbol | the name of the program scope pipe global variable. |
blocking | indicate if the read and write operations are blocking or non-blocking |
ptr | a pointer to buffer in host memory that will hold resulting data from pipe |
size | size of the memory region to read or write, in bytes. |
num_events_in_waitlist | number of events in the wait list. |
events_waitlist | specify events that need to complete before this particular command can be executed. |
event | returns an event object that identifies this read / write command and can be used to query or queue a wait for this command to complete. |
Definition at line 1013 of file pi_cuda.cpp.
pi_result piextEnqueueWriteHostPipe | ( | pi_queue | queue, |
pi_program | program, | ||
const char * | pipe_symbol, | ||
pi_bool | blocking, | ||
void * | ptr, | ||
size_t | size, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Write to pipe of a given name.
queue | a valid host command-queue in which the read / write command will be queued. command_queue and program must be created with the same OpenCL context. |
program | a program object with a successfully built executable. |
pipe_symbol | the name of the program scope pipe global variable. |
blocking | indicate if the read and write operations are blocking or non-blocking |
ptr | a pointer to buffer in host memory that holds data to be written to host pipe. |
size | size of the memory region to read or write, in bytes. |
num_events_in_waitlist | number of events in the wait list. |
events_waitlist | specify events that need to complete before this particular command can be executed. |
event | returns an event object that identifies this read / write command and can be used to query or queue a wait for this command to complete. |
Definition at line 1033 of file pi_cuda.cpp.
pi_result piextEventCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_context | context, | ||
bool | ownNativeHandle, | ||
pi_event * | event | ||
) |
Creates PI event object from a native handle.
NOTE: The created PI object takes ownership of the native handle.
nativeHandle | is the native handle to create PI event from. |
context | is the corresponding PI context |
pluginOwnsNativeHandle | Indicates whether the created PI object should take ownership of the native handle. |
event | is the PI event created from the native handle. |
Definition at line 627 of file pi_cuda.cpp.
pi_result piextEventGetNativeHandle | ( | pi_event | event, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native handle of a PI event object.
event | is the PI event to get the native handle of. |
nativeHandle | is the native handle of event. |
Definition at line 621 of file pi_cuda.cpp.
pi_result piextGetDeviceFunctionPointer | ( | pi_device | device, |
pi_program | program, | ||
const char * | function_name, | ||
pi_uint64 * | function_pointer_ret | ||
) |
Retrieves a device function pointer to a user-defined function.
function_name
. function_pointer_ret
is set to 0 if query failed.program
must be built before calling this API. device
must present in the list of devices returned by get_device
method for program
.If a fallback method determines the function exists but the address is not available PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE is returned. If the address does not exist PI_ERROR_INVALID_KERNEL_NAME is returned.
Definition at line 844 of file pi_cuda.cpp.
pi_result piextGetGlobalVariablePointer | ( | pi_device | Device, |
pi_program | Program, | ||
const char * | GlobalVariableName, | ||
size_t * | GlobalVariableSize, | ||
void ** | GlobalVariablePointerRet | ||
) |
Definition at line 851 of file pi_cuda.cpp.
pi_result piextImportExternalSemaphoreOpaqueFD | ( | pi_context | context, |
pi_device | device, | ||
int | file_descriptor, | ||
pi_interop_semaphore_handle * | ret_handle | ||
) |
API to import an external semaphore in the form of a file descriptor.
context | is the pi_context |
device | is the pi_device |
file_descriptor | is the file descriptor |
ret_handle | is the returned interop semaphore handle to the external semaphore |
Definition at line 479 of file pi_cuda.cpp.
pi_result piextKernelCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_context | context, | ||
pi_program | program, | ||
bool | pluginOwnsNativeHandle, | ||
pi_kernel * | kernel | ||
) |
Creates PI kernel object from a native handle.
NOTE: The created PI object takes ownership of the native handle.
nativeHandle | is the native handle to create PI kernel from. |
context | is the PI context of the kernel. |
program | is the PI program of the kernel. |
pluginOwnsNativeHandle | Indicates whether the created PI object should take ownership of the native handle. |
kernel | is the PI kernel created from the native handle. |
Definition at line 557 of file pi_cuda.cpp.
pi_result piextKernelGetNativeHandle | ( | pi_kernel | kernel, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native handle of a PI kernel object.
kernel | is the PI kernel to get the native handle of. |
nativeHandle | is the native handle of kernel. |
Definition at line 567 of file pi_cuda.cpp.
pi_result piextKernelSetArgMemObj | ( | pi_kernel | Kernel, |
pi_uint32 | ArgIndex, | ||
const pi_mem_obj_property * | ArgProperties, | ||
const pi_mem * | ArgValue | ||
) |
Definition at line 353 of file pi_cuda.cpp.
pi_result piextKernelSetArgPointer | ( | pi_kernel | kernel, |
pi_uint32 | arg_index, | ||
size_t | arg_size, | ||
const void * | arg_value | ||
) |
Sets up pointer arguments for CL kernels.
An extra indirection is required due to CL argument conventions.
kernel | is the kernel to be launched |
arg_index | is the index of the kernel argument |
arg_size | is the size in bytes of the argument (ignored in CL) |
arg_value | is the pointer argument |
Definition at line 900 of file pi_cuda.cpp.
pi_result piextKernelSetArgSampler | ( | pi_kernel | Kernel, |
pi_uint32 | ArgIndex, | ||
const pi_sampler * | ArgValue | ||
) |
Definition at line 360 of file pi_cuda.cpp.
pi_result piextKernelSuggestMaxCooperativeGroupCount | ( | pi_kernel | kernel, |
size_t | local_work_size, | ||
size_t | dynamic_shared_memory_size, | ||
pi_uint32 * | group_count_ret | ||
) |
Gets the max work group count for a cooperative kernel.
kernel | is the PI kernel being queried. |
local_work_size | is the number of work items in a work group that will be used when the kernel is launched. |
dynamic_shared_memory_size | is the size of dynamic shared memory, for each work group, in bytes, that will be used when the kernel is launched." |
group_count_ret | is a pointer to where the query result will be stored. |
Definition at line 572 of file pi_cuda.cpp.
pi_result piextMemCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_context | context, | ||
bool | ownNativeHandle, | ||
pi_mem * | mem | ||
) |
Creates PI mem object from a native handle.
NOTE: The created PI object takes ownership of the native handle.
nativeHandle | is the native handle to create PI mem from. |
context | The PI context of the memory allocation. |
ownNativeHandle | Indicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT. |
mem | is the PI mem created from the native handle. |
Definition at line 241 of file pi_cuda.cpp.
pi_result piextMemGetNativeHandle | ( | pi_mem | mem, |
pi_device | dev, | ||
pi_native_handle * | nativeHandle | ||
) |
Gets the native handle of a PI mem object.
mem | is the PI mem to get the native handle of. |
dev | is the PI device that the native allocation will be resident on |
nativeHandle | is the native handle of mem. |
Definition at line 236 of file pi_cuda.cpp.
pi_result piextMemImageAllocate | ( | pi_context | context, |
pi_device | device, | ||
pi_image_format * | image_format, | ||
pi_image_desc * | image_desc, | ||
pi_image_mem_handle * | ret_mem | ||
) |
API to allocate memory for bindless images.
context | is the pi_context |
device | is the pi_device |
flags | are extra flags to pass (currently unused) |
image_format | format of the image (channel order and data type) |
image_desc | image descriptor |
ret_mem | is the returning memory handle to newly allocated memory |
Definition at line 374 of file pi_cuda.cpp.
pi_result piextMemImageCopy | ( | pi_queue | command_queue, |
void * | dst_ptr, | ||
void * | src_ptr, | ||
const pi_image_format * | image_format, | ||
const pi_image_desc * | image_desc, | ||
const pi_image_copy_flags | flags, | ||
pi_image_offset | src_offset, | ||
pi_image_offset | dst_offset, | ||
pi_image_region | copy_extent, | ||
pi_image_region | host_extent, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
API to copy image data Host to Device or Device to Host.
queue | is the queue to submit to |
dst_ptr | is the location the data will be copied to |
src_ptr | is the data to be copied |
image_format | format of the image (channel order and data type) |
image_desc | image descriptor |
flags | flags describing copy direction (H2D or D2H) |
src_offset | is the offset into the source image/memory |
dst_offset | is the offset into the destination image/memory |
copy_extent | is the extent (region) of the image/memory to copy |
host_extent | is the extent (region) of the memory on the host |
num_events_in_wait_list | is the number of events in the wait list |
event_wait_list | is the list of events to wait on before copying |
event | is the returned event representing this operation |
Definition at line 427 of file pi_cuda.cpp.
pi_result piextMemImageCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_context | context, | ||
bool | ownNativeHandle, | ||
const pi_image_format * | ImageFormat, | ||
const pi_image_desc * | ImageDesc, | ||
pi_mem * | img | ||
) |
Creates PI image object from a native handle.
nativeHandle | is the native handle to create PI image from. |
context | The PI context of the memory allocation. |
ownNativeHandle | Indicates if we own the native memory handle or it came from interop that asked to not transfer the ownership to SYCL RT. |
ImageFormat | is the pi_image_format struct that specifies the image channnel order and channel data type that match what the nativeHandle uses |
ImageDesc | is the pi_image_desc struct that specifies the image dimension, pitch, slice and other information about the nativeHandle |
img | is the PI img created from the native handle. |
Definition at line 264 of file pi_cuda.cpp.
pi_result piextMemImageFree | ( | pi_context | context, |
pi_device | device, | ||
pi_image_mem_handle | memory_handle | ||
) |
API to free memory for bindless images.
context | is the pi_context |
device | is the pi_device |
memory_handle | is the handle to image memory to be freed |
Definition at line 417 of file pi_cuda.cpp.
pi_result piextMemImageGetInfo | ( | const pi_image_mem_handle | mem_handle, |
pi_image_info | param_name, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
API to query an image memory handle for specific properties.
mem_handle | is the handle to the image memory |
param_name | is the queried info name |
param_value | is the returned query value |
param_value_size_ret | is the returned query value size |
Definition at line 450 of file pi_cuda.cpp.
pi_result piextMemImportOpaqueFD | ( | pi_context | context, |
pi_device | device, | ||
size_t | size, | ||
int | file_descriptor, | ||
pi_interop_mem_handle * | ret_handle | ||
) |
API to import external memory in the form of a file descriptor.
context | is the pi_context |
device | is the pi_device |
size | is the size of the external memory |
file_descriptor | is the file descriptor |
ret_handle | is the returned interop memory handle to the external memory |
Definition at line 459 of file pi_cuda.cpp.
pi_result piextMemMapExternalArray | ( | pi_context | context, |
pi_device | device, | ||
pi_image_format * | image_format, | ||
pi_image_desc * | image_desc, | ||
pi_interop_mem_handle | mem_handle, | ||
pi_image_mem_handle * | ret_mem | ||
) |
API to map an interop memory handle to an image memory handle.
context | is the pi_context |
device | is the pi_device |
image_format | format of the image (channel order and data type) |
image_desc | image descriptor |
mem_handle | is the interop memory handle to the external memory |
ret_mem | is the returned image memory handle to the externally allocated memory |
Definition at line 465 of file pi_cuda.cpp.
pi_result piextMemMipmapFree | ( | pi_context | context, |
pi_device | device, | ||
pi_image_mem_handle | memory_handle | ||
) |
API to free mipmap memory for bindless images.
context | is the pi_context |
device | is the pi_device |
memory_handle | is the handle to image memory to be freed |
Definition at line 422 of file pi_cuda.cpp.
pi_result piextMemMipmapGetLevel | ( | pi_context | context, |
pi_device | device, | ||
pi_image_mem_handle | mip_mem, | ||
unsigned int | level, | ||
pi_image_mem_handle * | ret_mem | ||
) |
API to retrieve individual image from mipmap.
context | is the pi_context |
device | is the pi_device |
mip_mem | is the memory handle to the mipmap |
level | is the requested level of the mipmap |
ret_mem | is the returning memory handle to the individual image |
Definition at line 409 of file pi_cuda.cpp.
pi_result piextMemReleaseInterop | ( | pi_context | context, |
pi_device | device, | ||
pi_interop_mem_handle | memory_handle | ||
) |
API to destroy interop memory.
context | is the pi_context |
device | is the pi_device |
memory_handle | is the handle to interop memory to be freed |
Definition at line 473 of file pi_cuda.cpp.
pi_result piextMemSampledImageCreate | ( | pi_context | context, |
pi_device | device, | ||
pi_image_mem_handle | img_mem, | ||
pi_image_format * | image_format, | ||
pi_image_desc * | image_desc, | ||
pi_sampler | sampler, | ||
pi_mem * | ret_mem, | ||
pi_image_handle * | ret_handle | ||
) |
API to create sampled bindless image handles.
context | is the pi_context |
device | is the pi_device |
img_mem | is the handle to memory from which to create the image |
image_format | format of the image (channel order and data type) |
image_desc | image descriptor |
sampler | is the pi_sampler |
ret_mem | is the returning pi_mem image object |
ret_handle | is the returning memory handle to newly allocated memory |
Definition at line 391 of file pi_cuda.cpp.
pi_result piextMemSampledImageHandleDestroy | ( | pi_context | context, |
pi_device | device, | ||
pi_image_handle | handle | ||
) |
API to destroy bindless sampled image handles.
context | is the pi_context |
handle | is the image handle |
Definition at line 445 of file pi_cuda.cpp.
pi_result piextMemUnsampledImageCreate | ( | pi_context | context, |
pi_device | device, | ||
pi_image_mem_handle | img_mem, | ||
pi_image_format * | image_format, | ||
pi_image_desc * | image_desc, | ||
pi_mem * | ret_mem, | ||
pi_image_handle * | ret_handle | ||
) |
API to create bindless image handles.
context | is the pi_context |
device | is the pi_device |
img_mem | is the handle to memory from which to create the image |
image_format | format of the image (channel order and data type) |
image_desc | image descriptor |
ret_mem | is the returning pi_mem image object |
ret_handle | is the returning memory handle to newly allocated memory |
Definition at line 383 of file pi_cuda.cpp.
pi_result piextMemUnsampledImageHandleDestroy | ( | pi_context | context, |
pi_device | device, | ||
pi_image_handle | handle | ||
) |
API to destroy bindless unsampled image handles.
context | is the pi_context |
device | is the pi_device |
handle | is the image handle |
Definition at line 440 of file pi_cuda.cpp.
pi_result piextPeerAccessGetInfo | ( | pi_device | command_device, |
pi_device | peer_device, | ||
pi_peer_attr | attr, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 1264 of file pi_cuda.cpp.
pi_result piextPlatformCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_platform * | platform | ||
) |
Creates PI platform object from a native handle.
NOTE: The created PI object takes ownership of the native handle.
nativeHandle | is the native handle to create PI device from. |
platform | is the PI platform created from the native handle. |
Definition at line 47 of file pi_cuda.cpp.
pi_result piextPlatformGetNativeHandle | ( | pi_platform | platform, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native handle of a PI platform object.
platform | is the PI platform to get the native handle of. |
nativeHandle | is the native handle of platform. |
Definition at line 42 of file pi_cuda.cpp.
pi_result piextPluginGetOpaqueData | ( | void * | opaque_data_param, |
void ** | opaque_data_return | ||
) |
API to get Plugin internal data, opaque to SYCL RT.
Some devices whose device code is compiled by the host compiler (e.g. CPU emulators) may use it to access some device code functionality implemented in/behind the plugin.
opaque_data_param | - unspecified argument, interpretation is specific to a plugin |
opaque_data_return | - placeholder for the returned opaque data. |
Definition at line 1238 of file pi_cuda.cpp.
pi_result piextProgramCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
pi_context | context, | ||
bool | pluginOwnsNativeHandle, | ||
pi_program * | program | ||
) |
Creates PI program object from a native handle.
NOTE: The created PI object takes ownership of the native handle.
nativeHandle | is the native handle to create PI program from. |
context | is the PI context of the program. |
pluginOwnsNativeHandle | Indicates whether the created PI object should take ownership of the native handle. |
program | is the PI program created from the native handle. |
Definition at line 333 of file pi_cuda.cpp.
pi_result piextProgramGetNativeHandle | ( | pi_program | program, |
pi_native_handle * | nativeHandle | ||
) |
Gets the native handle of a PI program object.
program | is the PI program to get the native handle of. |
nativeHandle | is the native handle of program. |
Definition at line 328 of file pi_cuda.cpp.
pi_result piextProgramSetSpecializationConstant | ( | pi_program | prog, |
pi_uint32 | spec_id, | ||
size_t | spec_size, | ||
const void * | spec_value | ||
) |
Sets a specialization constant to a specific value.
Note: Only used when specialization constants are natively supported (SPIR-V binaries), and not when they are emulated (AOT binaries).
prog | the program object which will use the value |
spec_id | integer ID of the constant |
spec_size | size of the value |
spec_value | bytes of the value |
Definition at line 1060 of file pi_cuda.cpp.
pi_result piextQueueCreate | ( | pi_context | context, |
pi_device | device, | ||
pi_queue_properties * | properties, | ||
pi_queue * | queue | ||
) |
properties | points to a zero-terminated array of extra data describing desired queue properties. Format is {[PROPERTY[, property-specific elements of data]*,]* 0} |
Definition at line 167 of file pi_cuda.cpp.
Referenced by piQueueCreate().
pi_result piextQueueCreateWithNativeHandle | ( | pi_native_handle | nativeHandle, |
int32_t | nativeHandleDesc, | ||
pi_context | context, | ||
pi_device | device, | ||
bool | pluginOwnsNativeHandle, | ||
pi_queue_properties * | Properties, | ||
pi_queue * | queue | ||
) |
Creates PI queue object from a native handle.
NOTE: The created PI object takes ownership of the native handle.
nativeHandle | is the native handle to create PI queue from. |
nativeHandleDesc | provides additional properties of the native handle. |
context | is the PI context of the queue. |
device | is the PI device associated with the native device used when creating the native queue. This parameter is optional but some backends may fail to create the right PI queue if omitted. |
pluginOwnsNativeHandle | Indicates whether the created PI object should take ownership of the native handle. |
Properties | holds queue properties. |
queue | is the PI queue created from the native handle. |
Definition at line 198 of file pi_cuda.cpp.
pi_result piextQueueGetNativeHandle | ( | pi_queue | queue, |
pi_native_handle * | nativeHandle, | ||
int32_t * | nativeHandleDesc | ||
) |
Gets the native handle of a PI queue object.
queue | is the PI queue to get the native handle of. |
nativeHandle | is the native handle of queue or commandlist. |
nativeHandleDesc | provides additional properties of the native handle. |
Definition at line 190 of file pi_cuda.cpp.
pi_result piextSignalExternalSemaphore | ( | pi_queue | command_queue, |
pi_interop_semaphore_handle | sem_handle, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
API to instruct the queue to signal the external semaphore handle once all previous commands have completed execution.
command_queue | is the queue instructed to signal |
sem_handle | is the interop semaphore handle to signal |
num_events_in_wait_list | is the number of events in the wait list |
event_wait_list | is the list of events to wait on before this operation |
event | is the returned event representing this operation |
Definition at line 500 of file pi_cuda.cpp.
pi_result piextUSMDeviceAlloc | ( | void ** | result_ptr, |
pi_context | context, | ||
pi_device | device, | ||
pi_usm_mem_properties * | properties, | ||
size_t | size, | ||
pi_uint32 | alignment | ||
) |
Allocates device memory.
result_ptr | contains the allocated memory |
context | is the pi_context |
device | is the device the memory will be allocated on |
properties | are optional allocation properties |
size | is the size of the allocation |
alignment | is the desired alignment of the allocation |
Definition at line 860 of file pi_cuda.cpp.
pi_result piextUSMEnqueueFill2D | ( | pi_queue | queue, |
void * | ptr, | ||
size_t | pitch, | ||
size_t | pattern_size, | ||
const void * | pattern, | ||
size_t | width, | ||
size_t | height, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
USM 2D fill API.
queue | is the queue to submit to |
ptr | is the ptr to fill |
pitch | is the total width of the destination memory including padding |
pattern | is a pointer with the bytes of the pattern to set |
pattern_size | is the size in bytes of the pattern |
width | is width in bytes of each row to fill |
height | is height the columns to fill |
num_events_in_waitlist | is the number of events to wait on |
events_waitlist | is an array of events to wait on |
event | is the event that represents this operation |
Definition at line 941 of file pi_cuda.cpp.
pi_result piextUSMEnqueueMemAdvise | ( | pi_queue | queue, |
const void * | ptr, | ||
size_t | length, | ||
pi_mem_advice | advice, | ||
pi_event * | event | ||
) |
USM Memadvise API.
queue | is the queue to submit to |
ptr | is the data to be advised |
length | is the size in bytes of the memory to advise |
advice | is device specific advice |
event | is the event that represents this operation |
Definition at line 934 of file pi_cuda.cpp.
pi_result piextUSMEnqueueMemcpy | ( | pi_queue | queue, |
pi_bool | blocking, | ||
void * | dst_ptr, | ||
const void * | src_ptr, | ||
size_t | size, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
USM Memcpy API.
queue | is the queue to submit to |
blocking | is whether this operation should block the host |
src_ptr | is the data to be copied |
dst_ptr | is the location the data will be copied |
size | is number of bytes to copy |
num_events_in_waitlist | is the number of events to wait on |
events_waitlist | is an array of events to wait on |
event | is the event that represents this operation |
Definition at line 913 of file pi_cuda.cpp.
pi_result piextUSMEnqueueMemcpy2D | ( | pi_queue | queue, |
pi_bool | blocking, | ||
void * | dst_ptr, | ||
size_t | dst_pitch, | ||
const void * | src_ptr, | ||
size_t | src_pitch, | ||
size_t | width, | ||
size_t | height, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
USM 2D Memcpy API.
queue | is the queue to submit to |
blocking | is whether this operation should block the host |
dst_ptr | is the location the data will be copied |
dst_pitch | is the total width of the destination memory including padding |
src_ptr | is the data to be copied |
src_pitch | is the total width of the source memory including padding |
width | is width in bytes of each row to be copied |
height | is height the columns to be copied |
num_events_in_waitlist | is the number of events to wait on |
events_waitlist | is an array of events to wait on |
event | is the event that represents this operation |
Definition at line 966 of file pi_cuda.cpp.
pi_result piextUSMEnqueueMemset | ( | pi_queue | queue, |
void * | ptr, | ||
pi_int32 | value, | ||
size_t | count, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
USM Memset API.
queue | is the queue to submit to |
ptr | is the ptr to memset |
value | is value to set. It is interpreted as an 8-bit value and the upper 24 bits are ignored |
count | is the size in bytes to memset |
num_events_in_waitlist | is the number of events to wait on |
events_waitlist | is an array of events to wait on |
event | is the event that represents this operation |
Definition at line 905 of file pi_cuda.cpp.
pi_result piextUSMEnqueueMemset2D | ( | pi_queue | queue, |
void * | ptr, | ||
size_t | pitch, | ||
int | value, | ||
size_t | width, | ||
size_t | height, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
USM 2D Memset API.
queue | is the queue to submit to |
ptr | is the ptr to fill |
pitch | is the total width of the destination memory including padding |
value | the value to fill into the region in |
ptr | |
width | is width in bytes of each row to fill |
height | is height the columns to fill |
num_events_in_waitlist | is the number of events to wait on |
events_waitlist | is an array of events to wait on |
event | is the event that represents this operation |
Definition at line 954 of file pi_cuda.cpp.
pi_result piextUSMEnqueuePrefetch | ( | pi_queue | queue, |
const void * | ptr, | ||
size_t | size, | ||
pi_usm_migration_flags | flags, | ||
pi_uint32 | num_events_in_waitlist, | ||
const pi_event * | events_waitlist, | ||
pi_event * | event | ||
) |
Hint to migrate memory to the device.
queue | is the queue to submit to |
ptr | points to the memory to migrate |
size | is the number of bytes to migrate |
flags | is a bitfield used to specify memory migration options |
num_events_in_waitlist | is the number of events to wait on |
events_waitlist | is an array of events to wait on |
event | is the event that represents this operation |
Definition at line 924 of file pi_cuda.cpp.
pi_result piextUSMFree | ( | pi_context | context, |
void * | ptr | ||
) |
Indicates that the allocated USM memory is no longer needed on the runtime side.
The actual freeing of the memory may be done in a blocking or deferred manner, e.g. to avoid issues with indirect memory access from kernels.
context | is the pi_context of the allocation |
ptr | is the memory to be freed |
Definition at line 895 of file pi_cuda.cpp.
pi_result piextUSMGetMemAllocInfo | ( | pi_context | context, |
const void * | ptr, | ||
pi_mem_alloc_info | param_name, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if the queried pointer fell inside an allocation.
Result must fit in void * PI_MEM_ALLOC_SIZE returns how big the queried pointer's allocation is in bytes. Result is a size_t. PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against
context | is the pi_context |
ptr | is the pointer to query |
param_name | is the type of query to perform |
param_value_size | is the size of the result in bytes |
param_value | is the result |
param_value_size_ret | is how many bytes were written |
Definition at line 977 of file pi_cuda.cpp.
pi_result piextUSMHostAlloc | ( | void ** | result_ptr, |
pi_context | context, | ||
pi_usm_mem_properties * | properties, | ||
size_t | size, | ||
pi_uint32 | alignment | ||
) |
Allocates host memory accessible by the device.
result_ptr | contains the allocated memory |
context | is the pi_context |
properties | are optional allocation properties |
size | is the size of the allocation |
alignment | is the desired alignment of the allocation |
Definition at line 888 of file pi_cuda.cpp.
pi_result piextUSMImport | ( | const void * | ptr, |
size_t | size, | ||
pi_context | context | ||
) |
Import host system memory into USM.
ptr | start address of memory range to import |
size | is the number of bytes to import |
context | is the pi_context |
Definition at line 985 of file pi_cuda.cpp.
pi_result piextUSMPitchedAlloc | ( | void ** | result_ptr, |
size_t * | result_pitch, | ||
pi_context | context, | ||
pi_device | device, | ||
pi_usm_mem_properties * | properties, | ||
size_t | width_in_bytes, | ||
size_t | height, | ||
unsigned int | element_size_bytes | ||
) |
Allocates memory accessible on device.
result_ptr | contains the allocated memory |
result_pitch | contains the returned memory pitch |
context | is the pi_context |
device | is the device the memory will be allocated on |
properties | are optional allocation properties |
width_in_bytes | is the width of the allocation in bytes |
height | is the height of the allocation in rows |
element_size_bytes | is the size in bytes of an element in the allocation |
Definition at line 878 of file pi_cuda.cpp.
pi_result piextUSMRelease | ( | const void * | ptr, |
pi_context | context | ||
) |
Release host system memory from USM.
ptr | start address of imported memory range |
context | is the pi_context |
Definition at line 989 of file pi_cuda.cpp.
pi_result piextUSMSharedAlloc | ( | void ** | result_ptr, |
pi_context | context, | ||
pi_device | device, | ||
pi_usm_mem_properties * | properties, | ||
size_t | size, | ||
pi_uint32 | alignment | ||
) |
Allocates memory accessible on both host and device.
result_ptr | contains the allocated memory |
context | is the pi_context |
device | is the device the memory will be allocated on |
properties | are optional allocation properties |
size | is the size of the allocation |
alignment | is the desired alignment of the allocation |
Definition at line 869 of file pi_cuda.cpp.
pi_result piextWaitExternalSemaphore | ( | pi_queue | command_queue, |
pi_interop_semaphore_handle | sem_handle, | ||
pi_uint32 | num_events_in_wait_list, | ||
const pi_event * | event_wait_list, | ||
pi_event * | event | ||
) |
API to instruct the queue with a non-blocking wait on an external semaphore.
command_queue | is the queue instructed to wait |
sem_handle | is the interop semaphore handle |
num_events_in_wait_list | is the number of events in the wait list |
event_wait_list | is the list of events to wait on before this operation |
event | is the returned event representing this operation |
Definition at line 492 of file pi_cuda.cpp.
Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the host timer at the closest possible point in time to that at which DeviceTime was returned.
Device | device to query for timestamp |
DeviceTime | pointer to store device timestamp in nanoseconds. Optional argument, can be nullptr |
HostTime | pointer to store host timestamp in nanoseconds. Optional argurment, can be nullptr in which case timestamp will not be written |
Definition at line 1247 of file pi_cuda.cpp.
pi_result piKernelCreate | ( | pi_program | Program, |
const char * | KernelName, | ||
pi_kernel * | RetKernel | ||
) |
Definition at line 341 of file pi_cuda.cpp.
pi_result piKernelGetGroupInfo | ( | pi_kernel | Kernel, |
pi_device | Device, | ||
pi_kernel_group_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 508 of file pi_cuda.cpp.
pi_result piKernelGetInfo | ( | pi_kernel | Kernel, |
pi_kernel_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 366 of file pi_cuda.cpp.
pi_result piKernelGetSubGroupInfo | ( | pi_kernel | kernel, |
pi_device | device, | ||
pi_kernel_sub_group_info | param_name, | ||
size_t | input_value_size, | ||
const void * | input_value, | ||
size_t | param_value_size, | ||
void * | param_value, | ||
size_t * | param_value_size_ret | ||
) |
API to query information from the sub-group from a kernel.
kernel | is the pi_kernel to query |
device | is the device the kernel is executed on |
param_name | is a pi_kernel_sub_group_info enum value that specifies the informtation queried for. |
input_value_size | is the size of input value passed in ptr input_value param |
input_value | is the ptr to the input value passed. |
param_value_size | is the size of the value in bytes. |
param_value | is a pointer to the value to set. |
param_value_size_ret | is a pointer to return the size of data in param_value ptr. |
All queries expect a return of 4 bytes in param_value_size, param_value_size_ret, and a uint32_t value should to be written in param_value ptr. Note: This behaviour differs from OpenCL. OpenCL returns size_t.
Definition at line 516 of file pi_cuda.cpp.
Definition at line 531 of file pi_cuda.cpp.
Definition at line 526 of file pi_cuda.cpp.
pi_result piKernelSetArg | ( | pi_kernel | Kernel, |
pi_uint32 | ArgIndex, | ||
size_t | ArgSize, | ||
const void * | ArgValue | ||
) |
Definition at line 347 of file pi_cuda.cpp.
pi_result piKernelSetExecInfo | ( | pi_kernel | kernel, |
pi_kernel_exec_info | value_name, | ||
size_t | param_value_size, | ||
const void * | param_value | ||
) |
API to set attributes controlling kernel execution.
kernel | is the pi kernel to execute |
param_name | is a pi_kernel_exec_info value that specifies the info passed to the kernel |
param_value_size | is the size of the value in bytes |
param_value | is a pointer to the value to set for the kernel |
If param_name is PI_USM_INDIRECT_ACCESS, the value will be a ptr to the pi_bool value PI_TRUE If param_name is PI_USM_PTRS, the value will be an array of ptrs
Definition at line 1053 of file pi_cuda.cpp.
pi_result piMemBufferCreate | ( | pi_context | Context, |
pi_mem_flags | Flags, | ||
size_t | Size, | ||
void * | HostPtr, | ||
pi_mem * | RetMem, | ||
const pi_mem_properties * | properties | ||
) |
Definition at line 210 of file pi_cuda.cpp.
pi_result piMemBufferPartition | ( | pi_mem | Buffer, |
pi_mem_flags | Flags, | ||
pi_buffer_create_type | BufferCreateType, | ||
void * | BufferCreateInfo, | ||
pi_mem * | RetMem | ||
) |
Definition at line 836 of file pi_cuda.cpp.
pi_result piMemGetInfo | ( | pi_mem | Mem, |
pi_mem_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 217 of file pi_cuda.cpp.
pi_result piMemImageCreate | ( | pi_context | Context, |
pi_mem_flags | Flags, | ||
const pi_image_format * | ImageFormat, | ||
const pi_image_desc * | ImageDesc, | ||
void * | HostPtr, | ||
pi_mem * | RetImage | ||
) |
Definition at line 227 of file pi_cuda.cpp.
pi_result piMemImageGetInfo | ( | pi_mem | Image, |
pi_image_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 781 of file pi_cuda.cpp.
Definition at line 225 of file pi_cuda.cpp.
Definition at line 223 of file pi_cuda.cpp.
pi_result piPlatformGetInfo | ( | pi_platform | Platform, |
pi_platform_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 35 of file pi_cuda.cpp.
pi_result piPlatformsGet | ( | pi_uint32 | NumEntries, |
pi_platform * | Platforms, | ||
pi_uint32 * | NumPlatforms | ||
) |
Definition at line 30 of file pi_cuda.cpp.
pi_result piPluginGetBackendOption | ( | pi_platform | platform, |
const char * | frontend_option, | ||
const char ** | backend_option | ||
) |
API to get backend specific option.
frontend_option | is a string that contains frontend option. |
backend_option | is used to return the backend option corresponding to frontend option. |
Definition at line 56 of file pi_cuda.cpp.
pi_result piPluginGetLastError | ( | char ** | message | ) |
API to get Plugin specific warning and error messages.
message | is a returned address to the first element in the message the plugin owns the error message string. The string is thread-local. As a result, different threads may return different errors. A message is overwritten by the following error or warning that is produced within the given thread. The memory is cleaned up at the end of the thread's lifetime. |
device,and | syncronized host timestamp |
Definition at line 52 of file pi_cuda.cpp.
Definition at line 1275 of file pi_cuda.cpp.
pi_result piProgramBuild | ( | pi_program | Program, |
pi_uint32 | NumDevices, | ||
const pi_device * | DeviceList, | ||
const char * | Options, | ||
void(*)(pi_program Program, void *UserData) | PFnNotify, | ||
void * | UserData | ||
) |
Definition at line 302 of file pi_cuda.cpp.
References pi2ur::piProgramBuild().
pi_result piProgramCompile | ( | pi_program | Program, |
pi_uint32 | NumDevices, | ||
const pi_device * | DeviceList, | ||
const char * | Options, | ||
pi_uint32 | NumInputHeaders, | ||
const pi_program * | InputHeaders, | ||
const char ** | HeaderIncludeNames, | ||
void(*)(pi_program Program, void *UserData) | PFnNotify, | ||
void * | UserData | ||
) |
Definition at line 291 of file pi_cuda.cpp.
References pi2ur::piProgramCompile().
pi_result piProgramCreate | ( | pi_context | Context, |
const void * | ILBytes, | ||
size_t | Length, | ||
pi_program * | Program | ||
) |
Definition at line 248 of file pi_cuda.cpp.
pi_result piProgramCreateWithBinary | ( | pi_context | context, |
pi_uint32 | num_devices, | ||
const pi_device * | device_list, | ||
const size_t * | lengths, | ||
const unsigned char ** | binaries, | ||
size_t | num_metadata_entries, | ||
const pi_device_binary_property * | metadata, | ||
pi_int32 * | binary_status, | ||
pi_program * | ret_program | ||
) |
Creates a PI program for a context and loads the given binary into it.
context | is the PI context to associate the program with. |
num_devices | is the number of devices in device_list. |
device_list | is a pointer to a list of devices. These devices must all be in context. |
lengths | is an array of sizes in bytes of the binary in binaries. |
binaries | is a pointer to a list of program binaries. |
num_metadata_entries | is the number of metadata entries in metadata. |
metadata | is a pointer to a list of program metadata entries. The use of metadata entries is backend-defined. |
binary_status | returns whether the program binary was loaded succesfully or not, for each device in device_list. binary_status is ignored if it is null and otherwise it must be an array of num_devices elements. |
ret_program | is the PI program created from the program binaries. |
Definition at line 253 of file pi_cuda.cpp.
pi_result piProgramGetBuildInfo | ( | pi_program | Program, |
pi_device | Device, | ||
pi_program_build_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 310 of file pi_cuda.cpp.
pi_result piProgramGetInfo | ( | pi_program | Program, |
pi_program_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 272 of file pi_cuda.cpp.
pi_result piProgramLink | ( | pi_context | Context, |
pi_uint32 | NumDevices, | ||
const pi_device * | DeviceList, | ||
const char * | Options, | ||
pi_uint32 | NumInputPrograms, | ||
const pi_program * | InputPrograms, | ||
void(*)(pi_program Program, void *UserData) | PFnNotify, | ||
void * | UserData, | ||
pi_program * | RetProgram | ||
) |
Definition at line 280 of file pi_cuda.cpp.
References pi2ur::piProgramLink().
pi_result piProgramRelease | ( | pi_program | Program | ) |
Definition at line 324 of file pi_cuda.cpp.
pi_result piProgramRetain | ( | pi_program | Program | ) |
Definition at line 320 of file pi_cuda.cpp.
pi_result piQueueCreate | ( | pi_context | Context, |
pi_device | Device, | ||
pi_queue_properties | Flags, | ||
pi_queue * | Queue | ||
) |
Definition at line 161 of file pi_cuda.cpp.
Definition at line 186 of file pi_cuda.cpp.
Definition at line 188 of file pi_cuda.cpp.
pi_result piQueueGetInfo | ( | pi_queue | Queue, |
pi_queue_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 172 of file pi_cuda.cpp.
Definition at line 182 of file pi_cuda.cpp.
Definition at line 180 of file pi_cuda.cpp.
pi_result piSamplerCreate | ( | pi_context | Context, |
const pi_sampler_properties * | SamplerProperties, | ||
pi_sampler * | RetSampler | ||
) |
Definition at line 635 of file pi_cuda.cpp.
pi_result piSamplerGetInfo | ( | pi_sampler | Sampler, |
pi_sampler_info | ParamName, | ||
size_t | ParamValueSize, | ||
void * | ParamValue, | ||
size_t * | ParamValueSizeRet | ||
) |
Definition at line 641 of file pi_cuda.cpp.
pi_result piSamplerRelease | ( | pi_sampler | Sampler | ) |
Definition at line 653 of file pi_cuda.cpp.
pi_result piSamplerRetain | ( | pi_sampler | Sampler | ) |
Definition at line 649 of file pi_cuda.cpp.
pi_result piTearDown | ( | void * | PluginParameter | ) |
API to notify that the plugin should clean up its resources.
No PI calls should be made until the next piPluginInit call.
PluginParameter | placeholder for future use, currenly not used. |
Definition at line 1243 of file pi_cuda.cpp.
const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING |
Definition at line 1273 of file pi_cuda.cpp.
Referenced by piPluginInit().