DPC++ Runtime
Runtime libraries for oneAPI DPC++
|
|
Go to the documentation of this file.
17 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
32 #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \
33 if (err != CL_SUCCESS) { \
36 return cast<pi_result>(reterr); \
42 template <
class To,
class From> To
cast(From value) {
44 static_assert(
sizeof(From) ==
sizeof(To),
"cast failed size check");
49 #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2))
50 #define CONSTFIX constexpr
52 #define CONSTFIX const
62 "clCreateBufferWithPropertiesINTEL";
68 "clSetProgramSpecializationConstant";
70 "clGetDeviceFunctionPointerINTEL";
94 template <const
char *FuncName,
typename T>
98 thread_local
static std::map<pi_context, T> FuncPtrs;
101 if (
auto F = FuncPtrs[
context]) {
110 clGetContextInfo(cast<cl_context>(
context), CL_CONTEXT_NUM_DEVICES,
111 sizeof(
cl_uint), &deviceCount,
nullptr);
113 if (ret_err != CL_SUCCESS || deviceCount < 1) {
117 std::vector<cl_device_id> devicesInCtx(deviceCount);
118 ret_err = clGetContextInfo(cast<cl_context>(
context), CL_CONTEXT_DEVICES,
119 deviceCount *
sizeof(cl_device_id),
120 devicesInCtx.data(),
nullptr);
122 if (ret_err != CL_SUCCESS) {
126 cl_platform_id curPlatform;
127 ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
128 sizeof(cl_platform_id), &curPlatform,
nullptr);
130 if (ret_err != CL_SUCCESS) {
135 (
T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
146 return cast<pi_result>(ret_err);
157 clHostMemAllocINTEL_fn HFunc =
nullptr;
158 clSharedMemAllocINTEL_fn SFunc =
nullptr;
159 clDeviceMemAllocINTEL_fn DFunc =
nullptr;
160 cl_context CLContext;
161 cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(
kernel), CL_KERNEL_CONTEXT,
162 sizeof(cl_context), &CLContext,
nullptr);
163 if (CLErr != CL_SUCCESS) {
164 return cast<pi_result>(CLErr);
167 getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
168 cast<pi_context>(CLContext), &HFunc);
170 clSetKernelExecInfo(cast<cl_kernel>(
kernel),
171 CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
175 getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
176 cast<pi_context>(CLContext), &DFunc);
178 clSetKernelExecInfo(cast<cl_kernel>(
kernel),
179 CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
183 getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
184 cast<pi_context>(CLContext), &SFunc);
186 clSetKernelExecInfo(cast<cl_kernel>(
kernel),
187 CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
196 size_t paramValueSize,
void *paramValue,
197 size_t *paramValueSizeRet) {
220 cl_bool result = clGetDeviceInfo(
221 cast<cl_device_id>(
device), CL_DEVICE_EXTENSIONS, 0,
nullptr, &extSize);
222 std::string extStr(extSize,
'\0');
223 result = clGetDeviceInfo(cast<cl_device_id>(
device), CL_DEVICE_EXTENSIONS,
224 extSize, &extStr.front(),
nullptr);
225 if (extStr.find(
"cl_khr_int64_base_atomics") == std::string::npos ||
226 extStr.find(
"cl_khr_int64_extended_atomics") == std::string::npos)
239 cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
240 cl_int res = clGetDeviceInfo(cast<cl_device_id>(
device), CL_DEVICE_TYPE,
241 sizeof(cl_device_type), &devType,
nullptr);
245 cl_bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU);
254 if (paramValueSizeRet)
255 *paramValueSizeRet = paramValueSize;
257 size_t *out = cast<size_t *>(paramValue);
258 if (paramValueSize >=
sizeof(
size_t))
260 if (paramValueSize >= 2 *
sizeof(
size_t))
262 if (paramValueSize >= 3 *
sizeof(
size_t))
268 cl_int result = clGetDeviceInfo(
269 cast<cl_device_id>(
device), cast<cl_device_info>(paramName),
270 paramValueSize, paramValue, paramValueSizeRet);
277 cl_int result = clGetPlatformIDs(cast<cl_uint>(num_entries),
278 cast<cl_platform_id *>(platforms),
279 cast<cl_uint *>(num_platforms));
282 if (result == CL_PLATFORM_NOT_FOUND_KHR) {
283 assert(num_platforms != 0);
293 assert(nativeHandle);
301 cl_int result = clGetDeviceIDs(
303 cast<cl_uint>(num_entries), cast<cl_device_id *>(devices),
304 cast<cl_uint *>(num_devices));
307 if (result == CL_DEVICE_NOT_FOUND) {
308 assert(num_devices != 0);
312 return cast<pi_result>(result);
333 const char *image_target =
nullptr;
338 clGetDeviceInfo(cast<cl_device_id>(
device), CL_DEVICE_TYPE,
340 if (ret_err != CL_SUCCESS) {
341 *selected_image_ind = invalid_ind;
342 return cast<pi_result>(ret_err);
352 case CL_DEVICE_TYPE_CPU:
355 case CL_DEVICE_TYPE_GPU:
358 case CL_DEVICE_TYPE_ACCELERATOR:
370 for (
pi_uint32 i = 0; i < num_images; ++i) {
371 if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) {
372 *selected_image_ind = i;
375 if (strcmp(images[i]->DeviceTargetSpec,
380 if ((*selected_image_ind = fallback) != invalid_ind)
388 assert(piDevice !=
nullptr);
389 *piDevice =
reinterpret_cast<pi_device>(nativeHandle);
395 assert(
queue &&
"piQueueCreate failed, queue argument is null");
397 cl_platform_id curPlatform;
399 clGetDeviceInfo(cast<cl_device_id>(
device), CL_DEVICE_PLATFORM,
400 sizeof(cl_platform_id), &curPlatform,
nullptr);
405 ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, 0,
nullptr,
410 std::string platVer(platVerSize,
'\0');
411 ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, platVerSize,
412 &platVer.front(),
nullptr);
416 if (platVer.find(
"OpenCL 1.0") != std::string::npos ||
417 platVer.find(
"OpenCL 1.1") != std::string::npos ||
418 platVer.find(
"OpenCL 1.2") != std::string::npos) {
419 *
queue = cast<pi_queue>(clCreateCommandQueue(
421 cast<cl_command_queue_properties>(properties), &ret_err));
422 return cast<pi_result>(ret_err);
425 cl_queue_properties CreationFlagProperties[] = {
426 CL_QUEUE_PROPERTIES, cast<cl_command_queue_properties>(properties), 0};
427 *
queue = cast<pi_queue>(clCreateCommandQueueWithProperties(
429 CreationFlagProperties, &ret_err));
430 return cast<pi_result>(ret_err);
435 bool ownNativeHandle,
437 (void)ownNativeHandle;
438 assert(piQueue !=
nullptr);
439 *piQueue =
reinterpret_cast<pi_queue>(nativeHandle);
440 clRetainCommandQueue(cast<cl_command_queue>(nativeHandle));
448 clGetContextInfo(cast<cl_context>(
context), CL_CONTEXT_NUM_DEVICES,
449 sizeof(
cl_uint), &deviceCount,
nullptr);
451 std::vector<cl_device_id> devicesInCtx(deviceCount);
453 if (ret_err != CL_SUCCESS || deviceCount < 1) {
454 if (res_program !=
nullptr)
455 *res_program =
nullptr;
456 return cast<pi_result>(CL_INVALID_CONTEXT);
459 ret_err = clGetContextInfo(cast<cl_context>(
context), CL_CONTEXT_DEVICES,
460 deviceCount *
sizeof(cl_device_id),
461 devicesInCtx.data(),
nullptr);
465 cl_platform_id curPlatform;
466 ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
467 sizeof(cl_platform_id), &curPlatform,
nullptr);
472 ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, 0,
nullptr,
474 std::string devVer(devVerSize,
'\0');
475 ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, devVerSize,
476 &devVer.front(),
nullptr);
481 if (devVer.find(
"OpenCL 1.0") == std::string::npos &&
482 devVer.find(
"OpenCL 1.1") == std::string::npos &&
483 devVer.find(
"OpenCL 1.2") == std::string::npos &&
484 devVer.find(
"OpenCL 2.0") == std::string::npos) {
485 if (res_program !=
nullptr)
486 *res_program = cast<pi_program>(clCreateProgramWithIL(
487 cast<cl_context>(
context), il,
length, cast<cl_int *>(&err)));
492 ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_EXTENSIONS, 0,
nullptr,
494 std::string extStr(extSize,
'\0');
495 ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_EXTENSIONS, extSize,
496 &extStr.front(),
nullptr);
498 if (ret_err != CL_SUCCESS ||
499 extStr.find(
"cl_khr_il_program") == std::string::npos) {
500 if (res_program !=
nullptr)
501 *res_program =
nullptr;
502 return cast<pi_result>(CL_INVALID_CONTEXT);
506 cl_program(CL_API_CALL *)(cl_context,
const void *, size_t,
cl_int *);
508 reinterpret_cast<apiFuncT
>(clGetExtensionFunctionAddressForPlatform(
509 curPlatform,
"clCreateProgramWithILKHR"));
511 assert(funcPtr !=
nullptr);
512 if (res_program !=
nullptr)
513 *res_program = cast<pi_program>(
514 funcPtr(cast<cl_context>(
context), il,
length, cast<cl_int *>(&err)));
524 assert(piProgram !=
nullptr);
525 *piProgram =
reinterpret_cast<pi_program>(nativeHandle);
539 for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0;
542 normalizedCoords =
static_cast<pi_bool>(sampler_properties[++i]);
549 assert(
false &&
"Cannot recognize sampler property");
554 *result_sampler = cast<pi_sampler>(
555 clCreateSampler(cast<cl_context>(
context), normalizedCoords,
556 addressingMode, filterMode, cast<cl_int *>(&error_code)));
561 const pi_mem *arg_value) {
562 return cast<pi_result>(
563 clSetKernelArg(cast<cl_kernel>(
kernel), cast<cl_uint>(arg_index),
564 sizeof(arg_value), cast<const cl_mem *>(arg_value)));
569 return cast<pi_result>(
570 clSetKernelArg(cast<cl_kernel>(
kernel), cast<cl_uint>(arg_index),
571 sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
577 assert(piKernel !=
nullptr);
578 *piKernel =
reinterpret_cast<pi_kernel>(nativeHandle);
587 const std::string &sub_str) {
590 for (
const auto &x : str) {
591 if (x == delimiter) {
592 if (str.substr(beg,
length) == sub_str)
602 if (str.substr(beg,
length) == sub_str)
609 cl_device_id
device, cl_program program,
const char *FuncName,
612 const char *func_name,
615 cl_context CLContext =
nullptr;
617 clGetProgramInfo(cast<cl_program>(program), CL_PROGRAM_CONTEXT,
618 sizeof(CLContext), &CLContext,
nullptr);
620 if (ret_err != CL_SUCCESS)
621 return cast<pi_result>(ret_err);
626 cast<pi_context>(CLContext), &FuncT);
635 *function_pointer_ret = 0;
640 if (Res != CL_SUCCESS)
641 return cast<pi_result>(Res);
643 std::string ClResult(Size,
' ');
646 ClResult.size(), &ClResult[0],
nullptr);
647 if (Res != CL_SUCCESS)
648 return cast<pi_result>(Res);
661 pi_ret_err = cast<pi_result>(FuncT(cast<cl_device_id>(
device),
662 cast<cl_program>(program), func_name,
663 function_pointer_ret));
667 if (pi_ret_err == CL_INVALID_ARG_VALUE) {
668 *function_pointer_ret = 0;
678 const void *private_info,
679 size_t cb,
void *user_data1),
682 *retcontext = cast<pi_context>(
683 clCreateContext(properties, cast<cl_uint>(num_devices),
684 cast<const cl_device_id *>(devices),
pfn_notify,
685 user_data, cast<cl_int *>(&ret)));
693 bool ownNativeHandle,
697 (void)ownNativeHandle;
698 assert(piContext !=
nullptr);
699 assert(ownNativeHandle ==
false);
700 *piContext =
reinterpret_cast<pi_context>(nativeHandle);
711 clCreateBufferWithPropertiesINTEL_fn FuncPtr =
nullptr;
714 clCreateBufferWithPropertiesINTEL_fn>(
717 *ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(
context), properties,
718 cast<cl_mem_flags>(flags), size,
host_ptr,
719 cast<cl_int *>(&ret_err)));
724 *ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(
context),
725 cast<cl_mem_flags>(flags), size,
726 host_ptr, cast<cl_int *>(&ret_err)));
735 *ret_mem = cast<pi_mem>(
736 clCreateImage(cast<cl_context>(
context), cast<cl_mem_flags>(flags),
737 cast<const cl_image_format *>(image_format),
738 cast<const cl_image_desc *>(image_desc),
host_ptr,
739 cast<cl_int *>(&ret_err)));
746 void *buffer_create_info,
pi_mem *ret_mem) {
749 *ret_mem = cast<pi_mem>(
750 clCreateSubBuffer(cast<cl_mem>(buffer), cast<cl_mem_flags>(flags),
751 cast<cl_buffer_create_type>(buffer_create_type),
752 buffer_create_info, cast<cl_int *>(&ret_err)));
758 bool ownNativeHandle,
pi_mem *piMem) {
760 (void)ownNativeHandle;
761 assert(piMem !=
nullptr);
762 *piMem =
reinterpret_cast<pi_mem>(nativeHandle);
767 const char **strings,
768 const size_t *lengths,
772 *ret_program = cast<pi_program>(
773 clCreateProgramWithSource(cast<cl_context>(
context), cast<cl_uint>(count),
774 strings, lengths, cast<cl_int *>(&ret_err)));
780 const size_t *lengths,
const unsigned char **binaries,
784 (void)num_metadata_entries;
787 *ret_program = cast<pi_program>(clCreateProgramWithBinary(
788 cast<cl_context>(
context), cast<cl_uint>(num_devices),
789 cast<const cl_device_id *>(device_list), lengths, binaries,
790 cast<cl_int *>(binary_status), cast<cl_int *>(&ret_err)));
795 const pi_device *device_list,
const char *options,
802 *ret_program = cast<pi_program>(
803 clLinkProgram(cast<cl_context>(
context), cast<cl_uint>(num_devices),
804 cast<const cl_device_id *>(device_list), options,
805 cast<cl_uint>(num_input_programs),
806 cast<const cl_program *>(input_programs),
808 cast<cl_int *>(&ret_err)));
816 *ret_kernel = cast<pi_kernel>(clCreateKernel(
817 cast<cl_program>(program), kernel_name, cast<cl_int *>(&ret_err)));
823 size_t param_value_size,
void *param_value,
824 size_t *param_value_size_ret) {
829 switch (param_name) {
833 cl_int result = clGetKernelWorkGroupInfo(
835 cast<cl_kernel_work_group_info>(param_name), param_value_size,
836 param_value, param_value_size_ret);
843 size_t input_value_size,
844 const void *input_value,
845 size_t param_value_size,
void *param_value,
846 size_t *param_value_size_ret) {
847 (void)param_value_size;
850 ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
852 cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
853 sizeof(
size_t), &ret_val, param_value_size_ret));
855 if (ret_err != CL_SUCCESS)
856 return cast<pi_result>(ret_err);
858 *(
static_cast<uint32_t *
>(param_value)) =
static_cast<uint32_t
>(ret_val);
859 if (param_value_size_ret)
860 *param_value_size_ret =
sizeof(uint32_t);
867 *ret_event = cast<pi_event>(
868 clCreateUserEvent(cast<cl_context>(
context), cast<cl_int *>(&ret_err)));
874 bool ownNativeHandle,
878 (void)ownNativeHandle;
880 assert(piEvent !=
nullptr);
881 assert(nativeHandle);
884 *piEvent =
reinterpret_cast<pi_event>(nativeHandle);
890 size_t offset,
size_t size,
896 *ret_map = cast<void *>(clEnqueueMapBuffer(
897 cast<cl_command_queue>(command_queue), cast<cl_mem>(buffer),
898 cast<cl_bool>(blocking_map), map_flags, offset, size,
899 cast<cl_uint>(num_events_in_wait_list),
900 cast<const cl_event *>(event_wait_list), cast<cl_event *>(
event),
901 cast<cl_int *>(&ret_err)));
924 clHostMemAllocINTEL_fn FuncPtr =
nullptr;
925 RetVal = getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
929 Ptr = FuncPtr(cast<cl_context>(
context),
930 cast<cl_mem_properties_intel *>(properties), size, alignment,
931 cast<cl_int *>(&RetVal));
938 assert(
reinterpret_cast<std::uintptr_t
>(*result_ptr) % alignment == 0 &&
939 "allocation not aligned correctly");
961 clDeviceMemAllocINTEL_fn FuncPtr =
nullptr;
963 getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
967 Ptr = FuncPtr(cast<cl_context>(
context), cast<cl_device_id>(
device),
968 cast<cl_mem_properties_intel *>(properties), size, alignment,
969 cast<cl_int *>(&RetVal));
976 assert(
reinterpret_cast<std::uintptr_t
>(*result_ptr) % alignment == 0 &&
977 "allocation not aligned correctly");
999 clSharedMemAllocINTEL_fn FuncPtr =
nullptr;
1001 getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
1005 Ptr = FuncPtr(cast<cl_context>(
context), cast<cl_device_id>(
device),
1006 cast<cl_mem_properties_intel *>(properties), size, alignment,
1007 cast<cl_int *>(&RetVal));
1012 assert(alignment == 0 ||
1014 reinterpret_cast<std::uintptr_t
>(*result_ptr) % alignment == 0));
1025 clMemBlockingFreeINTEL_fn FuncPtr =
nullptr;
1035 std::is_same<clMemBlockingFreeINTEL_fn, clMemFreeINTEL_fn>::value);
1038 clGetContextInfo(cast<cl_context>(
context), CL_CONTEXT_NUM_DEVICES,
1039 sizeof(
cl_uint), &deviceCount,
nullptr);
1041 if (ret_err != CL_SUCCESS || deviceCount < 1) {
1045 std::vector<cl_device_id> devicesInCtx(deviceCount);
1046 ret_err = clGetContextInfo(cast<cl_context>(
context), CL_CONTEXT_DEVICES,
1047 deviceCount *
sizeof(cl_device_id),
1048 devicesInCtx.data(),
nullptr);
1050 if (ret_err != CL_SUCCESS) {
1054 bool useBlockingFree =
true;
1055 for (
const cl_device_id &dev : devicesInCtx) {
1056 cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
1057 ret_err = clGetDeviceInfo(dev, CL_DEVICE_TYPE,
sizeof(cl_device_type),
1059 if (ret_err != CL_SUCCESS) {
1062 useBlockingFree &= devType == CL_DEVICE_TYPE_GPU;
1066 if (useBlockingFree)
1068 getExtFuncFromContext<clMemBlockingFreeName, clMemBlockingFreeINTEL_fn>(
1071 RetVal = getExtFuncFromContext<clMemFreeName, clMemFreeINTEL_fn>(
context,
1075 RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(
context), ptr));
1089 size_t arg_size,
const void *arg_value) {
1095 cl_context CLContext;
1096 cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(
kernel), CL_KERNEL_CONTEXT,
1097 sizeof(cl_context), &CLContext,
nullptr);
1098 if (CLErr != CL_SUCCESS) {
1099 return cast<pi_result>(CLErr);
1102 clSetKernelArgMemPointerINTEL_fn FuncPtr =
nullptr;
1104 clSetKernelArgMemPointerINTEL_fn>(
1105 cast<pi_context>(CLContext), &FuncPtr);
1110 auto PtrToPtr =
reinterpret_cast<const intptr_t *
>(arg_value);
1111 auto DerefPtr =
reinterpret_cast<void *
>(*PtrToPtr);
1113 cast<pi_result>(FuncPtr(cast<cl_kernel>(
kernel), arg_index, DerefPtr));
1130 size_t count,
pi_uint32 num_events_in_waitlist,
1135 cl_context CLContext;
1137 clGetCommandQueueInfo(cast<cl_command_queue>(
queue), CL_QUEUE_CONTEXT,
1138 sizeof(cl_context), &CLContext,
nullptr);
1139 if (CLErr != CL_SUCCESS) {
1140 return cast<pi_result>(CLErr);
1143 clEnqueueMemsetINTEL_fn FuncPtr =
nullptr;
1145 getExtFuncFromContext<clEnqueueMemsetName, clEnqueueMemsetINTEL_fn>(
1146 cast<pi_context>(CLContext), &FuncPtr);
1149 RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(
queue), ptr, value,
1150 count, num_events_in_waitlist,
1151 cast<const cl_event *>(events_waitlist),
1152 cast<cl_event *>(
event)));
1169 const void *src_ptr,
size_t size,
1175 cl_context CLContext;
1177 clGetCommandQueueInfo(cast<cl_command_queue>(
queue), CL_QUEUE_CONTEXT,
1178 sizeof(cl_context), &CLContext,
nullptr);
1179 if (CLErr != CL_SUCCESS) {
1180 return cast<pi_result>(CLErr);
1183 clEnqueueMemcpyINTEL_fn FuncPtr =
nullptr;
1185 getExtFuncFromContext<clEnqueueMemcpyName, clEnqueueMemcpyINTEL_fn>(
1186 cast<pi_context>(CLContext), &FuncPtr);
1189 RetVal = cast<pi_result>(
1190 FuncPtr(cast<cl_command_queue>(
queue), blocking, dst_ptr, src_ptr, size,
1191 num_events_in_waitlist, cast<const cl_event *>(events_waitlist),
1192 cast<cl_event *>(
event)));
1219 return cast<pi_result>(clEnqueueMarkerWithWaitList(
1220 cast<cl_command_queue>(
queue), num_events_in_waitlist,
1221 cast<const cl_event *>(events_waitlist), cast<cl_event *>(
event)));
1264 return cast<pi_result>(
1265 clEnqueueMarkerWithWaitList(cast<cl_command_queue>(
queue), 0,
nullptr,
1266 reinterpret_cast<cl_event *
>(
event)));
1314 size_t param_value_size,
void *param_value,
1315 size_t *param_value_size_ret) {
1317 clGetMemAllocInfoINTEL_fn FuncPtr =
nullptr;
1319 getExtFuncFromContext<clGetMemAllocInfoName, clGetMemAllocInfoINTEL_fn>(
1323 RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(
context), ptr, param_name,
1324 param_value_size, param_value,
1325 param_value_size_ret));
1343 size_t param_value_size,
1344 const void *param_value) {
1349 return cast<pi_result>(clSetKernelExecInfo(
1350 cast<cl_kernel>(
kernel), param_name, param_value_size, param_value));
1355 cl_program program,
cl_uint spec_id,
size_t spec_size,
1356 const void *spec_value);
1361 const void *spec_value) {
1362 cl_program ClProg = cast<cl_program>(prog);
1363 cl_context Ctx =
nullptr;
1366 clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT,
sizeof(Ctx), &Ctx, &RetSize);
1368 if (Res != CL_SUCCESS)
1369 return cast<pi_result>(Res);
1373 decltype(F)>(cast<pi_context>(Ctx), &F);
1375 if (!F || Res != CL_SUCCESS)
1377 Res = F(ClProg, spec_id, spec_size, spec_value);
1378 return cast<pi_result>(Res);
1389 assert(nativeHandle !=
nullptr);
1432 (void)PluginParameter;
1438 if (CompareVersions < 0) {
1445 size_t PluginVersionSize =
sizeof(PluginInit->
PluginVersion);
1450 #define _PI_CL(pi_api, ocl_api) \
1451 (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
pi_result piclProgramCreateWithSource(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, pi_program *ret_program)
pi_result piContextCreate(const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data1), void *user_data, pi_context *retcontext)
pi_result piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
pi_result piEventRelease(pi_event event)
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextPlatformGetNativeHandle(pi_platform platform, pi_native_handle *nativeHandle)
Gets the native handle of a PI platform object.
pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
CONSTFIX char clGetMemAllocInfoName[]
pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
pi_result piPluginInit(pi_plugin *PluginInit)
pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, bool, pi_program *piProgram)
Creates PI program object from a native handle.
@ PI_DEVICE_INFO_GPU_EU_COUNT
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties)
pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
#define CHECK_ERR_SET_NULL_RET(err, ptr, reterr)
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *function_pointer_ret)
Retrieves a device function pointer to a user-defined function.
CONSTFIX char clCreateBufferWithPropertiesName[]
@ PI_DEVICE_INFO_IMAGE_SRGB
pi_result piProgramRetain(pi_program program)
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
pi_result piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
static bool is_in_separated_string(const std::string &str, char delimiter, const std::string &sub_str)
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
CONSTFIX char clSharedMemAllocName[]
@ PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
intptr_t pi_context_properties
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_SAMPLER_INFO_FILTER_MODE
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
pi_result piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piEventCreate(pi_context context, pi_event *ret_event)
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
@ PI_DEVICE_INFO_GPU_SLICES
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
pi_result piDeviceRetain(pi_device device)
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piQueueRelease(pi_queue command_queue)
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *piMem)
Creates PI mem object from a native handle.
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *piEvent)
Creates PI event object from a native handle.
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
_pi_sampler_addressing_mode
pi_result piKernelRelease(pi_kernel kernel)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
const char SupportedVersion[]
constexpr size_t MaxMessageSize
pi_result piEventSetStatus(pi_event event, pi_int32 execution_status)
pi_result piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle)
Gets the native handle of a PI context object.
@ PI_SAMPLER_FILTER_MODE_NEAREST
Implementation of a PI Kernel for CUDA.
pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext)
Creates PI context object from a native handle.
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform, pi_device *piDevice)
Creates PI device object from a native handle.
void memcpy(void *Dst, const void *Src, std::size_t Size)
CONSTFIX char clMemBlockingFreeName[]
CONSTFIX char clGetDeviceFunctionPointerName[]
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
pi_result piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform *platform)
Creates PI platform object from a native handle.
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
float length(T p) __NOEXC
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
pi_result piContextRelease(pi_context context)
PI queue mapping on to CUstream objects.
pi_result piProgramRelease(pi_program program)
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemRelease(pi_mem mem)
This struct is a record of the device binary information.
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information from the sub-group from a kernel.
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
CL_API_ENTRY cl_int(CL_API_CALL * clGetDeviceFunctionPointer_fn)(cl_device_id device, cl_program program, const char *FuncName, cl_ulong *ret_ptr)
CONSTFIX char clMemFreeName[]
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static pi_result piextGetNativeHandle(void *piObj, pi_native_handle *nativeHandle)
Common API for getting the native handle of a PI object.
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
CONSTFIX char clDeviceMemAllocName[]
thread_local pi_result ErrorMessageCode
multi_ptr< ElementType, access::address_space::global_host_space > host_ptr
@ PI_KERNEL_GROUP_INFO_NUM_REGS
thread_local char ErrorMessage[MaxMessageSize]
pi_result piQueueRetain(pi_queue command_queue)
pi_result piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
static pi_result getExtFuncFromContext(pi_context context, T *fptr)
pi_result piKernelRetain(pi_kernel kernel)
pi_result piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, void(*pfn_notify)(pi_event event, pi_int32 event_command_status, void *user_data), void *user_data)
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piSamplerRetain(pi_sampler sampler)
pi_result piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
pi_bitfield pi_mem_properties
Implementation of PI Program on CUDA Module object.
Implementation of samplers for CUDA.
pi_result piDeviceRelease(pi_device device)
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
@ PI_DEVICE_INFO_PCI_ADDRESS
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, pi_uint32 num_images, pi_uint32 *selected_image_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
uintptr_t pi_native_handle
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
pi_result piQueueFinish(pi_queue command_queue)
pi_result piContextRetain(pi_context context)
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
pi_bitfield pi_sampler_properties
CONSTFIX char clEnqueueMemsetName[]
CL_API_ENTRY cl_int(CL_API_CALL * clSetProgramSpecializationConstant_fn)(cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value)
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
CONSTFIX char clHostMemAllocName[]
@ PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE
PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE indicates a fallback method determines the function exists but i...
@ PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
CONSTFIX char clSetKernelArgMemPointerName[]
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
@ PI_PROGRAM_INFO_KERNEL_NAMES
static void setErrorMessage(const char *message, pi_result error_code)
static pi_result USMSetIndirectAccess(pi_kernel kernel)
Enables indirect access of pointers in kernels.
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
#define _PI_H_VERSION_STRING
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemRetain(pi_mem mem)
@ PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
pi_bitfield pi_queue_properties
@ PI_DEVICE_INFO_ATOMIC_64
pi_result piEventRetain(pi_event event)
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
PI Event mapping to CUevent.
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
@ PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native handle of a PI device object.
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piSamplerCreate(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_program, bool, pi_kernel *piKernel)
Creates PI kernel object from a native handle.
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
pi_result piQueueFlush(pi_queue command_queue)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
pi_result piEnqueueNativeKernel(pi_queue queue, void(*user_func)(void *), void *args, size_t cb_args, pi_uint32 num_mem_objects, const pi_mem *mem_list, const void **args_mem_loc, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
CONSTFIX char clSetProgramSpecializationConstantName[]
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_device, bool ownNativeHandle, pi_queue *piQueue)
Creates PI queue object from a native handle.
#define _PI_CL(pi_api, ocl_api)
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
void(* pfn_notify)(pi_event event, pi_int32 eventCommandStatus, void *userData)
pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Sets up pointer arguments for CL kernels.
pi_result piSamplerRelease(pi_sampler sampler)
@ PI_SAMPLER_INFO_NORMALIZED_COORDS
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_bitfield pi_usm_mem_properties
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
@ PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle)
Gets the native handle of a PI queue object.
CONSTFIX char clEnqueueMemcpyName[]
pi_result piextUSMFree(pi_context context, void *ptr)
Frees allocated USM memory.
PI context mapping to a CUDA context object.
PI device mapping to a CUdevice.
_pi_kernel_sub_group_info
@ PI_SAMPLER_INFO_ADDRESSING_MODE
@ PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
@ PI_SAMPLER_ADDRESSING_MODE_CLAMP