DPC++ Runtime
Runtime libraries for oneAPI DPC++
|
|
Go to the documentation of this file.
17 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
33 #include <string_view>
36 #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \
37 if (err != CL_SUCCESS) { \
40 return cast<pi_result>(reterr); \
44 template <
class To,
class From> To
cast(From value) {
46 static_assert(
sizeof(From) ==
sizeof(To),
"cast failed size check");
51 #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2))
52 #define CONSTFIX constexpr
54 #define CONSTFIX const
63 "clCreateBufferWithPropertiesINTEL";
69 "clSetProgramSpecializationConstant";
71 "clGetDeviceFunctionPointerINTEL";
73 "clEnqueueWriteGlobalVariableINTEL";
75 "clEnqueueReadGlobalVariableINTEL";
103 const char **backend_option) {
104 using namespace std::literals;
105 if (frontend_option ==
nullptr)
106 return PI_ERROR_INVALID_VALUE;
107 if (frontend_option ==
""sv) {
108 *backend_option =
"";
112 if (!strcmp(frontend_option,
"-O0")) {
113 *backend_option =
"-cl-opt-disable";
116 if (frontend_option ==
"-O1"sv || frontend_option ==
"-O2"sv ||
117 frontend_option ==
"-O3"sv) {
118 *backend_option =
"";
121 if (frontend_option ==
"-ftarget-compile-fast"sv) {
122 *backend_option =
"-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'";
125 return PI_ERROR_INVALID_VALUE;
130 cl_int ret_err = CL_INVALID_VALUE;
132 size_t platVerSize = 0;
134 clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0,
nullptr, &platVerSize);
136 std::string platVer(platVerSize,
'\0');
137 ret_err = clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize,
138 platVer.data(),
nullptr);
140 if (ret_err != CL_SUCCESS)
145 return CL_INVALID_PLATFORM;
151 cl_int ret_err = CL_INVALID_VALUE;
153 size_t devVerSize = 0;
154 ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0,
nullptr, &devVerSize);
156 std::string devVer(devVerSize,
'\0');
157 ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(),
160 if (ret_err != CL_SUCCESS)
165 return CL_INVALID_DEVICE;
171 const std::vector<std::string> &exts,
173 cl_int ret_err = CL_INVALID_VALUE;
176 ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0,
nullptr, &extSize);
178 std::string extStr(extSize,
'\0');
179 ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(),
182 if (ret_err != CL_SUCCESS)
186 for (
const std::string &ext : exts)
187 if (!(supported = (extStr.find(ext) != std::string::npos)))
194 cl_int(CL_API_CALL *)(cl_device_id
device, cl_program program,
198 cl_int(CL_API_CALL *)(cl_command_queue, cl_program,
const char *,
cl_bool,
199 size_t, size_t,
const void *,
cl_uint,
const cl_event *,
203 cl_int(CL_API_CALL *)(cl_command_queue, cl_program,
const char *,
cl_bool,
204 size_t, size_t,
void *,
cl_uint,
const cl_event *,
208 cl_int(CL_API_CALL *)(cl_program program,
cl_uint spec_id,
size_t spec_size,
209 const void *spec_value);
212 std::map<cl_context, T>
Map;
248 template <
typename T>
251 const char *FuncName, T *fptr) {
255 std::lock_guard<std::mutex> CacheLock{FPtrCache.
Mutex};
256 std::map<cl_context, T> &FPtrMap = FPtrCache.
Map;
257 auto It = FPtrMap.find(context);
258 if (It != FPtrMap.end()) {
263 return F ? PI_SUCCESS : PI_ERROR_INVALID_VALUE;
267 cl_int ret_err = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
268 sizeof(
cl_uint), &deviceCount,
nullptr);
270 if (ret_err != CL_SUCCESS || deviceCount < 1) {
271 return PI_ERROR_INVALID_CONTEXT;
274 std::vector<cl_device_id> devicesInCtx(deviceCount);
275 ret_err = clGetContextInfo(context, CL_CONTEXT_DEVICES,
276 deviceCount *
sizeof(cl_device_id),
277 devicesInCtx.data(),
nullptr);
279 if (ret_err != CL_SUCCESS) {
280 return PI_ERROR_INVALID_CONTEXT;
283 cl_platform_id curPlatform;
284 ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
285 sizeof(cl_platform_id), &curPlatform,
nullptr);
287 if (ret_err != CL_SUCCESS) {
288 return PI_ERROR_INVALID_CONTEXT;
292 (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
296 FPtrMap[context] =
nullptr;
297 return PI_ERROR_INVALID_VALUE;
301 FPtrMap[context] = FuncPtr;
303 return cast<pi_result>(ret_err);
314 clHostMemAllocINTEL_fn HFunc =
nullptr;
315 clSharedMemAllocINTEL_fn SFunc =
nullptr;
316 clDeviceMemAllocINTEL_fn DFunc =
nullptr;
317 cl_context CLContext;
318 cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
319 sizeof(cl_context), &CLContext,
nullptr);
320 if (CLErr != CL_SUCCESS) {
321 return cast<pi_result>(CLErr);
324 getExtFuncFromContext<clHostMemAllocINTEL_fn>(
328 clSetKernelExecInfo(cast<cl_kernel>(kernel),
329 CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
333 getExtFuncFromContext<clDeviceMemAllocINTEL_fn>(
337 clSetKernelExecInfo(cast<cl_kernel>(kernel),
338 CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
342 getExtFuncFromContext<clSharedMemAllocINTEL_fn>(
346 clSetKernelExecInfo(cast<cl_kernel>(kernel),
347 CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
356 size_t paramValueSize,
void *paramValue,
357 size_t *paramValueSizeRet) {
375 return PI_ERROR_INVALID_VALUE;
380 cl_device_id deviceID = cast<cl_device_id>(device);
382 if (ret_err != CL_SUCCESS) {
383 return cast<pi_result>(ret_err);
392 cl_device_atomic_capabilities cl_capabilities = 0;
393 cl_int ret_err = clGetDeviceInfo(
394 deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
395 sizeof(cl_device_atomic_capabilities), &cl_capabilities,
nullptr);
396 if (ret_err != CL_SUCCESS)
397 return cast<pi_result>(ret_err);
400 cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED |
401 CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
402 CL_DEVICE_ATOMIC_ORDER_SEQ_CST;
403 cl_capabilities &= mask;
408 if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
411 if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) {
424 return static_cast<pi_result>(CL_INVALID_VALUE);
426 std::memcpy(paramValue, &capabilities,
sizeof(capabilities));
429 if (paramValueSizeRet)
430 *paramValueSizeRet =
sizeof(capabilities);
432 return static_cast<pi_result>(CL_SUCCESS);
446 cl_device_id deviceID = cast<cl_device_id>(device);
448 if (ret_err != CL_SUCCESS)
451 cl_device_atomic_capabilities devCapabilities = 0;
453 ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
454 sizeof(cl_device_atomic_capabilities),
455 &devCapabilities,
nullptr);
456 if (ret_err != CL_SUCCESS)
458 assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
459 "Violates minimum mandated guarantee");
466 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
470 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
486 if (paramValueSize <
sizeof(cl_device_atomic_capabilities))
487 return PI_ERROR_INVALID_VALUE;
491 if (paramValueSizeRet)
492 *paramValueSizeRet =
sizeof(result);
504 cl_device_id deviceID = cast<cl_device_id>(device);
506 if (ret_err != CL_SUCCESS)
509 cl_device_atomic_capabilities devCapabilities = 0;
511 ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
512 sizeof(cl_device_atomic_capabilities),
513 &devCapabilities,
nullptr);
514 if (ret_err != CL_SUCCESS)
516 assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) &&
517 "Violates minimum mandated guarantee");
518 assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) &&
519 "Violates minimum mandated guarantee");
523 if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
539 if (paramValueSize <
sizeof(cl_device_atomic_capabilities))
540 return PI_ERROR_INVALID_VALUE;
544 if (paramValueSizeRet)
545 *paramValueSizeRet =
sizeof(result);
560 cl_device_id deviceID = cast<cl_device_id>(device);
562 if (ret_err != CL_SUCCESS)
565 cl_device_atomic_capabilities devCapabilities = 0;
567 ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
568 sizeof(cl_device_atomic_capabilities),
569 &devCapabilities,
nullptr);
570 if (ret_err != CL_SUCCESS)
572 assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
573 "Violates minimum mandated guarantee");
580 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
584 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
600 if (paramValueSize <
sizeof(cl_device_atomic_capabilities))
601 return PI_ERROR_INVALID_VALUE;
605 if (paramValueSizeRet)
606 *paramValueSizeRet =
sizeof(result);
610 cl_int ret_err = CL_SUCCESS;
612 bool supported =
false;
615 cast<cl_device_id>(device),
616 {
"cl_khr_int64_base_atomics",
"cl_khr_int64_extended_atomics"},
618 if (ret_err != CL_SUCCESS)
628 if (paramValueSize <
sizeof(result))
629 return PI_ERROR_INVALID_VALUE;
635 if (paramValueSize <
sizeof(result))
636 return PI_ERROR_INVALID_VALUE;
641 cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
642 cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
643 sizeof(cl_device_type), &devType,
nullptr);
647 bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU);
648 if (paramValueSize <
sizeof(result))
649 return PI_ERROR_INVALID_VALUE;
658 if (paramValueSizeRet)
659 *paramValueSizeRet = paramValueSize;
661 size_t *out = cast<size_t *>(paramValue);
662 if (paramValueSize >=
sizeof(
size_t))
664 if (paramValueSize >= 2 *
sizeof(
size_t))
666 if (paramValueSize >= 3 *
sizeof(
size_t))
680 if (err != CL_SUCCESS)
684 err = clGetDeviceInfo(cast<cl_device_id>(device),
685 cast<cl_device_info>(paramName), paramValueSize,
686 paramValue, paramValueSizeRet);
687 if (err != CL_SUCCESS)
690 if (paramValue && *
static_cast<cl_uint *
>(paramValue) == 0u) {
703 if (paramValue && paramValueSize <
sizeof(
cl_uint))
704 return static_cast<pi_result>(CL_INVALID_VALUE);
705 if (paramValueSizeRet)
706 *paramValueSizeRet =
sizeof(
cl_uint);
713 return static_cast<pi_result>(CL_SUCCESS);
717 const char *value =
"";
718 size_t valueSize = (strlen(value) + 1) *
sizeof(
char);
721 if (paramValueSizeRet !=
nullptr)
722 *paramValueSizeRet = valueSize;
726 cl_int ret_err = CL_SUCCESS;
728 if (paramValueSize <
sizeof(result))
729 return PI_ERROR_INVALID_VALUE;
730 bool supported =
false;
734 {
"cl_intel_mem_channel_property"}, supported);
735 if (ret_err != CL_SUCCESS)
744 if (paramValueSize <
sizeof(result))
745 return PI_ERROR_INVALID_VALUE;
746 cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
747 cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
748 sizeof(cl_device_type), &devType,
nullptr);
749 if (res != CL_SUCCESS)
754 sizeof(vendorId), &vendorId,
nullptr);
755 if (res != CL_SUCCESS)
758 result = devType == CL_DEVICE_TYPE_GPU && vendorId == 0x8086;
764 cl_int result = clGetDeviceInfo(
765 cast<cl_device_id>(device), cast<cl_device_info>(paramName),
766 paramValueSize, paramValue, paramValueSizeRet);
773 cl_int result = clGetPlatformIDs(cast<cl_uint>(num_entries),
774 cast<cl_platform_id *>(platforms),
775 cast<cl_uint *>(num_platforms));
778 if (result == CL_PLATFORM_NOT_FOUND_KHR) {
779 assert(num_platforms != 0);
787 size_t paramValueSize,
void *paramValue,
788 size_t *paramValueSizeRet) {
794 if (paramValueSize <
sizeof(result))
795 return PI_ERROR_INVALID_VALUE;
798 if (paramValueSizeRet)
799 *paramValueSizeRet =
sizeof(result);
803 cl_int result = clGetPlatformInfo(
804 cast<cl_platform_id>(platform), cast<cl_platform_info>(paramName),
805 paramValueSize, paramValue, paramValueSizeRet);
815 assert(nativeHandle);
816 *platform =
reinterpret_cast<pi_platform>(nativeHandle);
823 cl_int result = clGetDeviceIDs(
824 cast<cl_platform_id>(platform), cast<cl_device_type>(
device_type),
825 cast<cl_uint>(num_entries), cast<cl_device_id *>(devices),
826 cast<cl_uint *>(num_devices));
829 if (result == CL_DEVICE_NOT_FOUND) {
830 assert(num_devices != 0);
834 return cast<pi_result>(result);
860 clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
862 if (ret_err != CL_SUCCESS) {
863 *selected_image_ind = invalid_ind;
864 return cast<pi_result>(ret_err);
874 case CL_DEVICE_TYPE_CPU:
877 case CL_DEVICE_TYPE_GPU:
880 case CL_DEVICE_TYPE_ACCELERATOR:
892 for (
pi_uint32 i = 0; i < num_images; ++i) {
893 if (strcmp(images[i]->DeviceTargetSpec,
image_target) == 0) {
894 *selected_image_ind = i;
897 if (strcmp(images[i]->DeviceTargetSpec,
902 if ((*selected_image_ind = fallback) != invalid_ind)
905 return PI_ERROR_INVALID_BINARY;
910 assert(piDevice !=
nullptr);
911 *piDevice =
reinterpret_cast<pi_device>(nativeHandle);
921 return PI_ERROR_INVALID_VALUE;
924 assert(Properties[2] == 0);
925 if (Properties[2] != 0)
926 return PI_ERROR_INVALID_VALUE;
931 assert(queue &&
"piQueueCreate failed, queue argument is null");
933 cl_platform_id curPlatform;
935 clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_PLATFORM,
936 sizeof(cl_platform_id), &curPlatform,
nullptr);
941 assert(!(properties &
950 cl_command_queue_properties SupportByOpenCL =
951 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE |
952 CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT;
960 *queue = cast<pi_queue>(clCreateCommandQueue(
961 cast<cl_context>(context), cast<cl_device_id>(device),
962 cast<cl_command_queue_properties>(properties) & SupportByOpenCL,
964 return cast<pi_result>(ret_err);
967 cl_queue_properties CreationFlagProperties[] = {
969 cast<cl_command_queue_properties>(properties) & SupportByOpenCL, 0};
970 *queue = cast<pi_queue>(clCreateCommandQueueWithProperties(
971 cast<cl_context>(context), cast<cl_device_id>(device),
972 CreationFlagProperties, &ret_err));
973 return cast<pi_result>(ret_err);
977 size_t param_value_size,
void *param_value,
978 size_t *param_value_size_ret) {
979 if (queue ==
nullptr) {
980 return PI_ERROR_INVALID_QUEUE;
983 switch (param_name) {
986 return PI_ERROR_INVALID_VALUE;
988 cl_int CLErr = clGetCommandQueueInfo(
989 cast<cl_command_queue>(queue), cast<cl_command_queue_info>(param_name),
990 param_value_size, param_value, param_value_size_ret);
991 if (CLErr != CL_SUCCESS) {
992 return cast<pi_result>(CLErr);
1003 (void)NativeHandleDesc;
1004 (void)ownNativeHandle;
1006 assert(piQueue !=
nullptr);
1007 *piQueue =
reinterpret_cast<pi_queue>(nativeHandle);
1008 clRetainCommandQueue(cast<cl_command_queue>(nativeHandle));
1016 clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
1017 sizeof(
cl_uint), &deviceCount,
nullptr);
1019 std::vector<cl_device_id> devicesInCtx(deviceCount);
1021 if (ret_err != CL_SUCCESS || deviceCount < 1) {
1022 if (res_program !=
nullptr)
1023 *res_program =
nullptr;
1024 return cast<pi_result>(CL_INVALID_CONTEXT);
1027 ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
1028 deviceCount *
sizeof(cl_device_id),
1029 devicesInCtx.data(),
nullptr);
1033 cl_platform_id curPlatform;
1034 ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
1035 sizeof(cl_platform_id), &curPlatform,
nullptr);
1048 for (cl_device_id dev : devicesInCtx) {
1058 bool supported =
false;
1064 return cast<pi_result>(CL_INVALID_OPERATION);
1067 if (res_program !=
nullptr)
1068 *res_program = cast<pi_program>(clCreateProgramWithIL(
1069 cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
1076 for (cl_device_id dev : devicesInCtx) {
1077 bool supported =
false;
1083 return cast<pi_result>(CL_INVALID_OPERATION);
1087 cl_program(CL_API_CALL *)(cl_context,
const void *, size_t,
cl_int *);
1089 reinterpret_cast<apiFuncT
>(clGetExtensionFunctionAddressForPlatform(
1090 curPlatform,
"clCreateProgramWithILKHR"));
1092 assert(funcPtr !=
nullptr);
1093 if (res_program !=
nullptr)
1094 *res_program = cast<pi_program>(
1095 funcPtr(cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
1097 err = PI_ERROR_INVALID_VALUE;
1105 assert(piProgram !=
nullptr);
1106 *piProgram =
reinterpret_cast<pi_program>(nativeHandle);
1120 for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0;
1123 normalizedCoords =
static_cast<pi_bool>(sampler_properties[++i]);
1130 assert(
false &&
"Cannot recognize sampler property");
1135 *result_sampler = cast<pi_sampler>(
1136 clCreateSampler(cast<cl_context>(context), normalizedCoords,
1137 addressingMode, filterMode, cast<cl_int *>(&error_code)));
1143 const pi_mem *arg_value) {
1144 std::ignore = arg_properties;
1145 return cast<pi_result>(
1146 clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
1147 sizeof(arg_value), cast<const cl_mem *>(arg_value)));
1152 return cast<pi_result>(
1153 clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
1154 sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
1160 assert(piKernel !=
nullptr);
1161 *piKernel =
reinterpret_cast<pi_kernel>(nativeHandle);
1170 const std::string &sub_str) {
1173 for (
const auto &
x : str) {
1174 if (
x == delimiter) {
1175 if (str.substr(beg, length) == sub_str)
1185 if (str.substr(beg, length) == sub_str)
1192 const char *func_name,
1195 cl_context CLContext =
nullptr;
1197 clGetProgramInfo(cast<cl_program>(program), CL_PROGRAM_CONTEXT,
1198 sizeof(CLContext), &CLContext,
nullptr);
1200 if (ret_err != CL_SUCCESS)
1201 return cast<pi_result>(ret_err);
1204 ret_err = getExtFuncFromContext<clGetDeviceFunctionPointer_fn>(
1215 *function_pointer_ret = 0;
1220 if (Res != CL_SUCCESS)
1221 return cast<pi_result>(Res);
1223 std::string ClResult(Size,
' ');
1226 ClResult.size(), &ClResult[0],
nullptr);
1227 if (Res != CL_SUCCESS)
1228 return cast<pi_result>(Res);
1233 ClResult.pop_back();
1235 return PI_ERROR_INVALID_KERNEL_NAME;
1237 pi_ret_err = PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1241 pi_ret_err = cast<pi_result>(FuncT(cast<cl_device_id>(device),
1242 cast<cl_program>(program), func_name,
1243 function_pointer_ret));
1247 if (pi_ret_err == CL_INVALID_ARG_VALUE) {
1248 *function_pointer_ret = 0;
1249 return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1257 void (*pfn_notify)(
const char *errinfo,
1258 const void *private_info,
1259 size_t cb,
void *user_data1),
1261 pi_result ret = PI_ERROR_INVALID_OPERATION;
1262 *retcontext = cast<pi_context>(
1263 clCreateContext(properties, cast<cl_uint>(num_devices),
1264 cast<const cl_device_id *>(devices), pfn_notify,
1265 user_data, cast<cl_int *>(&ret)));
1273 bool ownNativeHandle,
1277 (void)ownNativeHandle;
1278 assert(piContext !=
nullptr);
1279 assert(ownNativeHandle ==
false);
1280 *piContext =
reinterpret_cast<pi_context>(nativeHandle);
1285 size_t paramValueSize,
void *paramValue,
1286 size_t *paramValueSizeRet) {
1287 switch (paramName) {
1303 PI_ERROR_INVALID_ARG_VALUE);
1304 return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
1307 cl_int result = clGetContextInfo(
1308 cast<cl_context>(context), cast<cl_context_info>(paramName),
1309 paramValueSize, paramValue, paramValueSizeRet);
1317 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1321 clCreateBufferWithPropertiesINTEL_fn FuncPtr =
nullptr;
1322 cl_context CLContext = cast<cl_context>(context);
1324 ret_err = getExtFuncFromContext<clCreateBufferWithPropertiesINTEL_fn>(
1329 cast<pi_mem>(FuncPtr(CLContext, properties, cast<cl_mem_flags>(flags),
1330 size,
host_ptr, cast<cl_int *>(&ret_err)));
1335 *ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
1336 cast<cl_mem_flags>(flags), size,
1337 host_ptr, cast<cl_int *>(&ret_err)));
1345 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1346 *ret_mem = cast<pi_mem>(
1347 clCreateImage(cast<cl_context>(context), cast<cl_mem_flags>(flags),
1349 cast<const cl_image_desc *>(image_desc),
host_ptr,
1350 cast<cl_int *>(&ret_err)));
1357 void *buffer_create_info,
pi_mem *ret_mem) {
1359 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1360 *ret_mem = cast<pi_mem>(
1361 clCreateSubBuffer(cast<cl_mem>(buffer), cast<cl_mem_flags>(flags),
1362 cast<cl_buffer_create_type>(buffer_create_type),
1363 buffer_create_info, cast<cl_int *>(&ret_err)));
1369 bool ownNativeHandle,
pi_mem *piMem) {
1371 (void)ownNativeHandle;
1372 assert(piMem !=
nullptr);
1373 *piMem =
reinterpret_cast<pi_mem>(nativeHandle);
1382 (void)ownNativeHandle;
1385 assert(Img !=
nullptr);
1386 *Img =
reinterpret_cast<pi_mem>(nativeHandle);
1392 const size_t *lengths,
const unsigned char **binaries,
1396 (void)num_metadata_entries;
1398 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1399 *ret_program = cast<pi_program>(clCreateProgramWithBinary(
1400 cast<cl_context>(context), cast<cl_uint>(num_devices),
1401 cast<const cl_device_id *>(device_list), lengths, binaries,
1402 cast<cl_int *>(binary_status), cast<cl_int *>(&ret_err)));
1407 const pi_device *device_list,
const char *options,
1410 void (*pfn_notify)(
pi_program program,
void *user_data),
1413 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1414 *ret_program = cast<pi_program>(
1415 clLinkProgram(cast<cl_context>(context), cast<cl_uint>(num_devices),
1416 cast<const cl_device_id *>(device_list), options,
1417 cast<cl_uint>(num_input_programs),
1418 cast<const cl_program *>(input_programs),
1419 cast<
void (*)(cl_program,
void *)>(pfn_notify), user_data,
1420 cast<cl_int *>(&ret_err)));
1427 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1428 *ret_kernel = cast<pi_kernel>(clCreateKernel(
1429 cast<cl_program>(program), kernel_name, cast<cl_int *>(&ret_err)));
1435 size_t param_value_size,
void *param_value,
1436 size_t *param_value_size_ret) {
1437 if (kernel ==
nullptr) {
1438 return PI_ERROR_INVALID_KERNEL;
1441 switch (param_name) {
1443 return PI_ERROR_INVALID_VALUE;
1445 cl_int result = clGetKernelWorkGroupInfo(
1446 cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1447 cast<cl_kernel_work_group_info>(param_name), param_value_size,
1448 param_value, param_value_size_ret);
1455 size_t input_value_size,
1456 const void *input_value,
1457 size_t param_value_size,
void *param_value,
1458 size_t *param_value_size_ret) {
1459 (void)param_value_size;
1463 std::shared_ptr<void> implicit_input_value;
1472 if (pi_ret_err != PI_SUCCESS)
1474 std::shared_ptr<size_t[]> WGSizes{
new size_t[max_dims]};
1477 max_dims *
sizeof(
size_t), WGSizes.get(),
nullptr);
1478 if (pi_ret_err != PI_SUCCESS)
1480 for (
size_t i = 1; i < max_dims; ++i)
1481 WGSizes.get()[i] = 1;
1482 implicit_input_value = std::move(WGSizes);
1483 input_value_size = max_dims *
sizeof(size_t);
1484 input_value = implicit_input_value.get();
1487 ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
1488 cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1489 cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
1490 sizeof(
size_t), &ret_val, param_value_size_ret));
1492 if (ret_err == CL_INVALID_OPERATION) {
1498 ret_err = CL_SUCCESS;
1501 ret_err = CL_SUCCESS;
1504 size_t result_size = 0;
1510 nullptr, &result_size);
1511 if (pi_ret_err != PI_SUCCESS) {
1514 assert(result_size %
sizeof(
size_t) == 0);
1515 std::vector<size_t> result(result_size /
sizeof(
size_t));
1517 result_size, result.data(),
nullptr);
1518 if (pi_ret_err != PI_SUCCESS) {
1521 ret_val = *std::max_element(result.begin(), result.end());
1522 ret_err = CL_SUCCESS;
1525 ret_err = CL_SUCCESS;
1529 if (ret_err != CL_SUCCESS)
1530 return cast<pi_result>(ret_err);
1532 *(
static_cast<uint32_t *
>(param_value)) =
static_cast<uint32_t
>(ret_val);
1533 if (param_value_size_ret)
1534 *param_value_size_ret =
sizeof(uint32_t);
1540 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1541 auto *cl_err = cast<cl_int *>(&ret_err);
1543 cl_event e = clCreateUserEvent(cast<cl_context>(context), cl_err);
1544 *ret_event = cast<pi_event>(e);
1545 if (*cl_err != CL_SUCCESS)
1547 *cl_err = clSetUserEventStatus(e, CL_COMPLETE);
1553 bool ownNativeHandle,
1557 (void)ownNativeHandle;
1559 assert(piEvent !=
nullptr);
1560 assert(nativeHandle);
1563 *piEvent =
reinterpret_cast<pi_event>(nativeHandle);
1569 size_t offset,
size_t size,
1574 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1575 *ret_map = cast<void *>(clEnqueueMapBuffer(
1576 cast<cl_command_queue>(command_queue), cast<cl_mem>(buffer),
1577 cast<cl_bool>(blocking_map), map_flags, offset, size,
1578 cast<cl_uint>(num_events_in_wait_list),
1579 cast<const cl_event *>(event_wait_list), cast<cl_event *>(event),
1580 cast<cl_int *>(&ret_err)));
1599 void *Ptr =
nullptr;
1600 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1603 clHostMemAllocINTEL_fn FuncPtr =
nullptr;
1604 cl_context CLContext = cast<cl_context>(context);
1605 RetVal = getExtFuncFromContext<clHostMemAllocINTEL_fn>(
1610 Ptr = FuncPtr(CLContext, cast<cl_mem_properties_intel *>(properties), size,
1617 if (RetVal == PI_SUCCESS &&
alignment != 0)
1618 assert(
reinterpret_cast<std::uintptr_t
>(*result_ptr) %
alignment == 0 &&
1619 "allocation not aligned correctly");
1637 void *Ptr =
nullptr;
1638 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1641 clDeviceMemAllocINTEL_fn FuncPtr =
nullptr;
1642 cl_context CLContext = cast<cl_context>(context);
1643 RetVal = getExtFuncFromContext<clDeviceMemAllocINTEL_fn>(
1648 Ptr = FuncPtr(CLContext, cast<cl_device_id>(device),
1649 cast<cl_mem_properties_intel *>(properties), size,
alignment,
1650 cast<cl_int *>(&RetVal));
1656 if (RetVal == PI_SUCCESS &&
alignment != 0)
1657 assert(
reinterpret_cast<std::uintptr_t
>(*result_ptr) %
alignment == 0 &&
1658 "allocation not aligned correctly");
1676 void *Ptr =
nullptr;
1677 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1680 clSharedMemAllocINTEL_fn FuncPtr =
nullptr;
1681 cl_context CLContext = cast<cl_context>(context);
1682 RetVal = getExtFuncFromContext<clSharedMemAllocINTEL_fn>(
1687 Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
1688 cast<cl_mem_properties_intel *>(properties), size,
alignment,
1689 cast<cl_int *>(&RetVal));
1695 (RetVal == PI_SUCCESS &&
1696 reinterpret_cast<std::uintptr_t
>(*result_ptr) %
alignment == 0));
1707 clMemBlockingFreeINTEL_fn FuncPtr =
nullptr;
1709 cl_context CLContext = cast<cl_context>(context);
1710 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1711 RetVal = getExtFuncFromContext<clMemBlockingFreeINTEL_fn>(
1716 RetVal = cast<pi_result>(FuncPtr(CLContext, ptr));
1730 size_t arg_size,
const void *arg_value) {
1736 cl_context CLContext;
1737 cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
1738 sizeof(cl_context), &CLContext,
nullptr);
1739 if (CLErr != CL_SUCCESS) {
1740 return cast<pi_result>(CLErr);
1743 clSetKernelArgMemPointerINTEL_fn FuncPtr =
nullptr;
1744 pi_result RetVal = getExtFuncFromContext<clSetKernelArgMemPointerINTEL_fn>(
1751 auto PtrToPtr =
reinterpret_cast<const intptr_t *
>(arg_value);
1752 auto DerefPtr =
reinterpret_cast<void *
>(*PtrToPtr);
1754 cast<pi_result>(FuncPtr(cast<cl_kernel>(kernel), arg_index, DerefPtr));
1771 size_t count,
pi_uint32 num_events_in_waitlist,
1776 cl_context CLContext;
1778 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1779 sizeof(cl_context), &CLContext,
nullptr);
1780 if (CLErr != CL_SUCCESS) {
1781 return cast<pi_result>(CLErr);
1784 clEnqueueMemFillINTEL_fn FuncPtr =
nullptr;
1785 pi_result RetVal = getExtFuncFromContext<clEnqueueMemFillINTEL_fn>(
1790 RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue), ptr, &value,
1791 1, count, num_events_in_waitlist,
1792 cast<const cl_event *>(events_waitlist),
1793 cast<cl_event *>(event)));
1810 const void *src_ptr,
size_t size,
1816 cl_context CLContext;
1818 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1819 sizeof(cl_context), &CLContext,
nullptr);
1820 if (CLErr != CL_SUCCESS) {
1821 return cast<pi_result>(CLErr);
1824 clEnqueueMemcpyINTEL_fn FuncPtr =
nullptr;
1825 pi_result RetVal = getExtFuncFromContext<clEnqueueMemcpyINTEL_fn>(
1830 RetVal = cast<pi_result>(
1831 FuncPtr(cast<cl_command_queue>(queue), blocking, dst_ptr, src_ptr, size,
1832 num_events_in_waitlist, cast<const cl_event *>(events_waitlist),
1833 cast<cl_event *>(event)));
1858 return PI_ERROR_INVALID_VALUE;
1860 return cast<pi_result>(clEnqueueMarkerWithWaitList(
1861 cast<cl_command_queue>(queue), num_events_in_waitlist,
1862 cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
1905 return cast<pi_result>(
1906 clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0,
nullptr,
1907 reinterpret_cast<cl_event *
>(event)));
1950 size_t pitch,
size_t pattern_size,
1951 const void *pattern,
size_t width,
1956 std::ignore = queue;
1958 std::ignore = pitch;
1959 std::ignore = pattern_size;
1960 std::ignore = pattern;
1961 std::ignore = width;
1962 std::ignore = height;
1963 std::ignore = num_events_in_waitlist;
1964 std::ignore = events_waitlist;
1965 std::ignore = event;
1966 return PI_ERROR_INVALID_OPERATION;
1981 pi_queue queue,
void *ptr,
size_t pitch,
int value,
size_t width,
1982 size_t height,
pi_uint32 num_events_in_waitlist,
1984 std::ignore = queue;
1986 std::ignore = pitch;
1987 std::ignore = value;
1988 std::ignore = width;
1989 std::ignore = height;
1990 std::ignore = num_events_in_waitlist;
1991 std::ignore = events_waitlist;
1992 std::ignore = event;
1993 return PI_ERROR_INVALID_OPERATION;
2012 const void *src_ptr,
size_t src_pitch,
size_t width,
size_t height,
2015 std::ignore = queue;
2016 std::ignore = blocking;
2017 std::ignore = dst_ptr;
2018 std::ignore = dst_pitch;
2019 std::ignore = src_ptr;
2020 std::ignore = src_pitch;
2021 std::ignore = width;
2022 std::ignore = height;
2023 std::ignore = num_events_in_waitlist;
2024 std::ignore = events_waitlist;
2025 std::ignore = event;
2026 return PI_ERROR_INVALID_OPERATION;
2047 size_t param_value_size,
void *param_value,
2048 size_t *param_value_size_ret) {
2050 clGetMemAllocInfoINTEL_fn FuncPtr =
nullptr;
2051 cl_context CLContext = cast<cl_context>(context);
2052 pi_result RetVal = getExtFuncFromContext<clGetMemAllocInfoINTEL_fn>(
2057 RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr, param_name,
2058 param_value_size, param_value,
2059 param_value_size_ret));
2068 std::ignore = context;
2074 std::ignore = context;
2093 pi_bool blocking_write,
size_t count,
size_t offset,
const void *src,
2096 cl_context Ctx =
nullptr;
2098 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2099 sizeof(Ctx), &Ctx,
nullptr);
2101 if (Res != CL_SUCCESS)
2102 return cast<pi_result>(Res);
2105 Res = getExtFuncFromContext<decltype(F)>(
2109 if (!F || Res != CL_SUCCESS)
2110 return PI_ERROR_INVALID_OPERATION;
2111 Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
2112 blocking_write, count, offset, src, num_events_in_wait_list,
2113 cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
2114 return cast<pi_result>(Res);
2132 size_t count,
size_t offset,
void *dst,
pi_uint32 num_events_in_wait_list,
2134 cl_context Ctx =
nullptr;
2136 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2137 sizeof(Ctx), &Ctx,
nullptr);
2139 if (Res != CL_SUCCESS)
2140 return cast<pi_result>(Res);
2143 Res = getExtFuncFromContext<decltype(F)>(
2147 if (!F || Res != CL_SUCCESS)
2148 return PI_ERROR_INVALID_OPERATION;
2149 Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
2150 blocking_read, count, offset, dst, num_events_in_wait_list,
2151 cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
2152 return cast<pi_result>(Res);
2156 const char *pipe_symbol,
pi_bool blocking,
2157 void *ptr,
size_t size,
2161 cl_context CLContext;
2163 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2164 sizeof(cl_context), &CLContext,
nullptr);
2165 if (CLErr != CL_SUCCESS) {
2166 return cast<pi_result>(CLErr);
2169 clEnqueueReadHostPipeINTEL_fn FuncPtr =
nullptr;
2170 pi_result RetVal = getExtFuncFromContext<clEnqueueReadHostPipeINTEL_fn>(
2175 RetVal = cast<pi_result>(FuncPtr(
2176 cast<cl_command_queue>(queue), cast<cl_program>(program), pipe_symbol,
2177 blocking, ptr, size, num_events_in_waitlist,
2178 cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
2185 const char *pipe_symbol,
pi_bool blocking,
2186 void *ptr,
size_t size,
2190 cl_context CLContext;
2192 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2193 sizeof(cl_context), &CLContext,
nullptr);
2194 if (CLErr != CL_SUCCESS) {
2195 return cast<pi_result>(CLErr);
2198 clEnqueueWriteHostPipeINTEL_fn FuncPtr =
nullptr;
2199 pi_result RetVal = getExtFuncFromContext<clEnqueueWriteHostPipeINTEL_fn>(
2204 RetVal = cast<pi_result>(FuncPtr(
2205 cast<cl_command_queue>(queue), cast<cl_program>(program), pipe_symbol,
2206 blocking, ptr, size, num_events_in_waitlist,
2207 cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
2225 size_t param_value_size,
2226 const void *param_value) {
2231 return cast<pi_result>(clSetKernelExecInfo(
2232 cast<cl_kernel>(kernel), param_name, param_value_size, param_value));
2239 const void *spec_value) {
2240 cl_program ClProg = cast<cl_program>(prog);
2241 cl_context Ctx =
nullptr;
2244 clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT,
sizeof(Ctx), &Ctx, &RetSize);
2246 if (Res != CL_SUCCESS)
2247 return cast<pi_result>(Res);
2250 Res = getExtFuncFromContext<decltype(F)>(
2254 if (!F || Res != CL_SUCCESS)
2255 return PI_ERROR_INVALID_OPERATION;
2256 Res = F(ClProg, spec_id, spec_size, spec_value);
2257 return cast<pi_result>(Res);
2268 assert(nativeHandle !=
nullptr);
2290 int32_t *nativeHandleDesc) {
2291 *nativeHandleDesc = 0;
2316 (void)ret_command_buffer;
2319 return PI_ERROR_INVALID_OPERATION;
2323 (void)command_buffer;
2326 return PI_ERROR_INVALID_OPERATION;
2330 (void)command_buffer;
2333 return PI_ERROR_INVALID_OPERATION;
2337 (void)command_buffer;
2340 return PI_ERROR_INVALID_OPERATION;
2345 const size_t *global_work_offset,
const size_t *global_work_size,
2346 const size_t *local_work_size,
pi_uint32 num_sync_points_in_wait_list,
2349 (void)command_buffer;
2352 (void)global_work_offset;
2353 (void)global_work_size;
2354 (void)local_work_size;
2355 (void)num_sync_points_in_wait_list;
2356 (void)sync_point_wait_list;
2360 return PI_ERROR_INVALID_OPERATION;
2365 const void *src_ptr,
size_t size,
2369 (void)command_buffer;
2373 (void)num_sync_points_in_wait_list;
2374 (void)sync_point_wait_list;
2378 return PI_ERROR_INVALID_OPERATION;
2383 size_t src_offset,
size_t dst_offset,
size_t size,
2387 (void)command_buffer;
2393 (void)num_sync_points_in_wait_list;
2394 (void)sync_point_wait_list;
2398 return PI_ERROR_INVALID_OPERATION;
2405 size_t dst_row_pitch,
size_t dst_slice_pitch,
2409 (void)command_buffer;
2415 (void)src_row_pitch;
2416 (void)src_slice_pitch;
2417 (void)dst_row_pitch;
2418 (void)dst_slice_pitch;
2419 (void)num_sync_points_in_wait_list;
2420 (void)sync_point_wait_list;
2424 return PI_ERROR_INVALID_OPERATION;
2429 size_t size,
void *dst,
pi_uint32 num_sync_points_in_wait_list,
2432 (void)command_buffer;
2437 (void)num_sync_points_in_wait_list;
2438 (void)sync_point_wait_list;
2442 return PI_ERROR_INVALID_OPERATION;
2449 size_t buffer_slice_pitch,
size_t host_row_pitch,
size_t host_slice_pitch,
2450 void *ptr,
pi_uint32 num_sync_points_in_wait_list,
2453 (void)command_buffer;
2455 (void)buffer_offset;
2458 (void)buffer_row_pitch;
2459 (void)buffer_slice_pitch;
2460 (void)host_row_pitch;
2461 (void)host_slice_pitch;
2463 (void)num_sync_points_in_wait_list;
2464 (void)sync_point_wait_list;
2468 return PI_ERROR_INVALID_OPERATION;
2473 size_t size,
const void *ptr,
pi_uint32 num_sync_points_in_wait_list,
2476 (void)command_buffer;
2481 (void)num_sync_points_in_wait_list;
2482 (void)sync_point_wait_list;
2486 return PI_ERROR_INVALID_OPERATION;
2493 size_t buffer_slice_pitch,
size_t host_row_pitch,
size_t host_slice_pitch,
2494 const void *ptr,
pi_uint32 num_sync_points_in_wait_list,
2497 (void)command_buffer;
2499 (void)buffer_offset;
2502 (void)buffer_row_pitch;
2503 (void)buffer_slice_pitch;
2504 (void)host_row_pitch;
2505 (void)host_slice_pitch;
2507 (void)num_sync_points_in_wait_list;
2508 (void)sync_point_wait_list;
2512 return PI_ERROR_INVALID_OPERATION;
2520 (void)command_buffer;
2522 (void)num_events_in_wait_list;
2523 (void)event_wait_list;
2527 return PI_ERROR_INVALID_OPERATION;
2538 (void)PluginParameter;
2545 uint64_t *HostTime) {
2547 cl_platform_id platform;
2548 cl_device_id deviceID = cast<cl_device_id>(Device);
2551 auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
2552 sizeof(cl_platform_id), &platform,
nullptr);
2553 if (ret_err != CL_SUCCESS) {
2554 return cast<pi_result>(ret_err);
2559 if (ret_err != CL_SUCCESS) {
2560 return cast<pi_result>(ret_err);
2567 "OpenCL version for device and/or platform is less than 2.1",
2568 PI_ERROR_INVALID_OPERATION);
2569 return PI_ERROR_INVALID_OPERATION;
2574 clGetDeviceAndHostTimer(deviceID, DeviceTime,
2575 HostTime ==
nullptr ? &dummy : HostTime);
2577 }
else if (HostTime) {
2578 clGetHostTimer(deviceID, HostTime);
2585 size_t param_value_size,
void *param_value,
2586 size_t *param_value_size_ret) {
2588 clGetEventInfo(
reinterpret_cast<cl_event
>(event), param_name,
2589 param_value_size, param_value, param_value_size_ret);
2590 if (result == CL_SUCCESS && param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) {
2594 const auto param_value_int =
static_cast<cl_int *
>(param_value);
2595 if (*param_value_int == CL_QUEUED) {
2596 *param_value_int = CL_SUBMITTED;
2609 size_t PluginVersionSize =
sizeof(PluginInit->
PluginVersion);
2611 return PI_ERROR_INVALID_VALUE;
2614 #define _PI_CL(pi_api, ocl_api) \
2615 (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
2773 #define __SYCL_PLUGIN_DLL_NAME "pi_opencl.dll"
2774 #include "../common_win_pi_trace/common_win_pi_trace.hpp"
2775 #undef __SYCL_PLUGIN_DLL_NAME
pi_result piContextCreate(const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data1), void *user_data, pi_context *retcontext)
pi_result piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
FuncPtrCache< clEnqueueMemcpyINTEL_fn > clEnqueueMemcpyINTELCache
pi_result piEventRelease(pi_event event)
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextPlatformGetNativeHandle(pi_platform platform, pi_native_handle *nativeHandle)
Gets the native handle of a PI platform object.
pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
@ PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
CONSTFIX char clGetMemAllocInfoName[]
pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem_obj_property *arg_properties, const pi_mem *arg_value)
FuncPtrCache< clHostMemAllocINTEL_fn > clHostMemAllocINTELCache
pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value) clSetProgramSpecializationConstant_fn
pi_result piPluginInit(pi_plugin *PluginInit)
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, bool, pi_program *piProgram)
Creates PI program object from a native handle.
@ PI_DEVICE_INFO_GPU_EU_COUNT
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties)
pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
#define CHECK_ERR_SET_NULL_RET(err, ptr, reterr)
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
FuncPtrCache< clEnqueueWriteHostPipeINTEL_fn > clEnqueueWriteHostPipeINTELCache
pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to submit the command-buffer to queue for execution, returns an error if the command-buffer is no...
std::map< cl_context, T > Map
@ PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQUIRE
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *function_pointer_ret)
Retrieves a device function pointer to a user-defined function.
CONSTFIX char clCreateBufferWithPropertiesName[]
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
@ PI_DEVICE_INFO_IMAGE_SRGB
pi_result piProgramRetain(pi_program program)
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
pi_result piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM memcpy command to the command-buffer.
pi_result piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
@ PI_KERNEL_COMPILE_NUM_SUB_GROUPS
static bool is_in_separated_string(const std::string &str, char delimiter, const std::string &sub_str)
@ PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
CONSTFIX char clSharedMemAllocName[]
@ PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
FuncPtrCache< clEnqueueWriteGlobalVariable_fn > clEnqueueWriteGlobalVariableCache
pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
void memcpy(void *Dst, const void *Src, size_t Size)
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
intptr_t pi_context_properties
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_SAMPLER_INFO_FILTER_MODE
pi_result piextEnqueueReadHostPipe(pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Plugin.
constexpr alignment_key::value_t< K > alignment
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
pi_result piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piEventCreate(pi_context context, pi_event *ret_event)
Create PI event object in a signalled/completed state.
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
@ PI_DEVICE_INFO_GPU_SLICES
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
pi_result piDeviceRetain(pi_device device)
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
pi_result piQueueRelease(pi_queue command_queue)
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *piMem)
Creates PI mem object from a native handle.
@ PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
FuncPtrCache< clSetProgramSpecializationConstant_fn > clSetProgramSpecializationConstantCache
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API for writing data from host to a device global variable.
pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime)
Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the h...
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *piEvent)
Creates PI event object from a native handle.
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
const OpenCLVersion V2_1(2, 1)
_pi_sampler_addressing_mode
pi_result piKernelRelease(pi_kernel kernel)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
static cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version)
static ExtFuncPtrCacheT * ExtFuncPtrCache
pi_result piextCommandBufferMemBufferCopyRect(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer copy command to the command-buffer.
const char SupportedVersion[]
pi_result piextUSMRelease(const void *ptr, pi_context context)
Release host system memory from USM.
constexpr size_t MaxMessageSize
pi_result piEventSetStatus(pi_event event, pi_int32 execution_status)
pi_result piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle)
Gets the native handle of a PI context object.
multi_ptr< ElementType, access::address_space::ext_intel_global_host_space, IsDecorated > host_ptr
FuncPtrCache< clSetKernelArgMemPointerINTEL_fn > clSetKernelArgMemPointerINTELCache
@ PI_SAMPLER_FILTER_MODE_NEAREST
pi_result piextCommandBufferRetain(pi_ext_command_buffer command_buffer)
API to increment the reference count of the command-buffer.
pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext)
Creates PI context object from a native handle.
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
CONSTFIX char clEnqueueReadGlobalVariableName[]
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform, pi_device *piDevice)
Creates PI device object from a native handle.
FuncPtrCache< clGetMemAllocInfoINTEL_fn > clGetMemAllocInfoINTELCache
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
@ PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
CONSTFIX char clMemBlockingFreeName[]
CONSTFIX char clGetDeviceFunctionPointerName[]
FuncPtrCache< clSharedMemAllocINTEL_fn > clSharedMemAllocINTELCache
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
pi_result piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform *platform)
Creates PI platform object from a native handle.
pi_result piextCommandBufferNDRangeKernel(pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a kernel execution command to the command-buffer.
FuncPtrCache< clEnqueueReadHostPipeINTEL_fn > clEnqueueReadHostPipeINTELCache
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
@ PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS
pi_result piContextRelease(pi_context context)
pi_result piProgramRelease(pi_program program)
static cl_int checkDeviceExtensions(cl_device_id dev, const std::vector< std::string > &exts, bool &supported)
const OpenCLVersion V2_0(2, 0)
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemRelease(pi_mem mem)
This struct is a record of the device binary information.
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piContextGetInfo(pi_context context, pi_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information from the sub-group from a kernel.
pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
pi_result piextCommandBufferCreate(pi_context context, pi_device device, const pi_ext_command_buffer_desc *desc, pi_ext_command_buffer *ret_command_buffer)
API to create a command-buffer.
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM
static pi_result piextGetNativeHandle(void *piObj, pi_native_handle *nativeHandle)
Common API for getting the native handle of a PI object.
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
CONSTFIX char clDeviceMemAllocName[]
thread_local pi_result ErrorMessageCode
@ PI_KERNEL_GROUP_INFO_NUM_REGS
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP
pi_result piQueueGetInfo(pi_queue queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
@ PI_KERNEL_MAX_SUB_GROUP_SIZE
thread_local char ErrorMessage[MaxMessageSize]
pi_result piQueueRetain(pi_queue command_queue)
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST
pi_result piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextMemImageCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, pi_mem *Img)
Creates PI image object from a native handle.
pi_result piKernelRetain(pi_kernel kernel)
pi_result piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, void(*pfn_notify)(pi_event event, pi_int32 event_command_status, void *user_data), void *user_data)
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
#define _PI_OPENCL_PLUGIN_VERSION_STRING
pi_uint32 pi_ext_sync_point
pi_result piextUSMImport(const void *ptr, size_t size, pi_context context)
Import host system memory into USM.
pi_result piSamplerRetain(pi_sampler sampler)
@ PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
pi_result piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
pi_bitfield pi_memory_scope_capabilities
FuncPtrCache< clGetDeviceFunctionPointer_fn > clGetDeviceFunctionPointerCache
pi_bitfield pi_mem_properties
@ PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT
pi_result piDeviceRelease(pi_device device)
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, const char *FuncName, cl_ulong *ret_ptr) clGetDeviceFunctionPointer_fn
pi_result piextCommandBufferRelease(pi_ext_command_buffer command_buffer)
API to decrement the reference count of the command-buffer.
@ PI_KERNEL_MAX_NUM_SUB_GROUPS
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
@ PI_DEVICE_INFO_PCI_ADDRESS
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, pi_uint32 num_images, pi_uint32 *selected_image_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
uintptr_t pi_native_handle
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
pi_result piQueueFinish(pi_queue command_queue)
pi_result piContextRetain(pi_context context)
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
pi_bitfield pi_sampler_properties
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
CONSTFIX char clEnqueueWriteGlobalVariableName[]
FuncPtrCache< clDeviceMemAllocINTEL_fn > clDeviceMemAllocINTELCache
CONSTFIX char clHostMemAllocName[]
CONSTFIX char clSetKernelArgMemPointerName[]
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
@ PI_PROGRAM_INFO_KERNEL_NAMES
static void setErrorMessage(const char *message, pi_result error_code)
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Fill API.
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc)
Gets the native handle of a PI queue object.
static pi_result USMSetIndirectAccess(pi_kernel kernel)
Enables indirect access of pointers in kernels.
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW
#define _PI_PLUGIN_VERSION_CHECK(PI_API_VERSION, PI_PLUGIN_VERSION)
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piMemRetain(pi_mem mem)
pi_result piextCommandBufferMemBufferRead(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer read command to the command-buffer.
@ PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
pi_bitfield pi_queue_properties
@ PI_DEVICE_INFO_ATOMIC_64
@ PI_DEVICE_INFO_BACKEND_VERSION
pi_result piEventRetain(pi_event event)
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *) clEnqueueReadGlobalVariable_fn
@ PI_EXT_PLATFORM_INFO_BACKEND
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, int32_t NativeHandleDesc, pi_context, pi_device, bool ownNativeHandle, pi_queue_properties *Properties, pi_queue *piQueue)
Creates PI queue object from a native handle.
@ PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
pi_result piextCommandBufferMemBufferWrite(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer write command to the command-buffer.
static cl_int getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version)
CONSTFIX char clEnqueueWriteHostPipeName[]
@ PI_EXT_PLATFORM_BACKEND_OPENCL
The backend is OpenCL.
pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native handle of a PI device object.
pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, const char **backend_option)
API to get backend specific option.
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piSamplerCreate(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
@ PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_program, bool, pi_kernel *piKernel)
Creates PI kernel object from a native handle.
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) clEnqueueWriteGlobalVariable_fn
CONSTFIX char clEnqueueMemFillName[]
@ PI_DEVICE_INFO_VENDOR_ID
pi_result piextEnqueueWriteHostPipe(pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Write to pipe of a given name.
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
pi_result piQueueFlush(pi_queue command_queue)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
CONSTFIX char clSetProgramSpecializationConstantName[]
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
pi_result piextCommandBufferFinalize(pi_ext_command_buffer command_buffer)
API to stop command-buffer recording such that no more commands can be appended, and makes the comman...
pi_result piextCommandBufferMemBufferWriteRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer write command to the command-buffer.
@ PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
@ PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT
#define _PI_CL(pi_api, ocl_api)
FuncPtrCache< clEnqueueReadGlobalVariable_fn > clEnqueueReadGlobalVariableCache
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
@ PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
const OpenCLVersion V3_0(3, 0)
pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Sets up pointer arguments for CL kernels.
@ PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
FuncPtrCache< clEnqueueMemFillINTEL_fn > clEnqueueMemFillINTELCache
FuncPtrCache< clCreateBufferWithPropertiesINTEL_fn > clCreateBufferWithPropertiesINTELCache
pi_result piSamplerRelease(pi_sampler sampler)
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_bitfield pi_memory_order_capabilities
@ PI_SAMPLER_INFO_NORMALIZED_COORDS
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
pi_result piextCommandBufferMemBufferReadRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer read command to the command-buffer.
pi_bitfield pi_usm_mem_properties
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
@ PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
CONSTFIX char clEnqueueReadHostPipeName[]
static pi_result getExtFuncFromContext(cl_context context, FuncPtrCache< T > &FPtrCache, const char *FuncName, T *fptr)
FuncPtrCache< clMemBlockingFreeINTEL_fn > clMemBlockingFreeINTELCache
@ PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
pi_result piextQueueCreate(pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue)
CONSTFIX char clEnqueueMemcpyName[]
pi_result piextUSMFree(pi_context context, void *ptr)
Frees allocated USM memory in a blocking manner.
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
constexpr pi_queue_properties PI_QUEUE_FLAGS
pi_result piextCommandBufferMemBufferCopy(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer copy command to the command-buffer.
_pi_kernel_sub_group_info
@ PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
@ PI_SAMPLER_INFO_ADDRESSING_MODE
@ PI_SAMPLER_ADDRESSING_MODE_CLAMP