17 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
34 #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \
35 if (err != CL_SUCCESS) { \
38 return cast<pi_result>(reterr); \
42 template <
class To,
class From> To
cast(From value) {
44 static_assert(
sizeof(From) ==
sizeof(To),
"cast failed size check");
49 #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2))
50 #define CONSTFIX constexpr
52 #define CONSTFIX const
61 "clCreateBufferWithPropertiesINTEL";
67 "clSetProgramSpecializationConstant";
69 "clGetDeviceFunctionPointerINTEL";
71 "clEnqueueWriteGlobalVariableINTEL";
73 "clEnqueueReadGlobalVariableINTEL";
98 cl_int ret_err = CL_INVALID_VALUE;
100 size_t platVerSize = 0;
102 clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0,
nullptr, &platVerSize);
104 std::string platVer(platVerSize,
'\0');
105 ret_err = clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize,
106 platVer.data(),
nullptr);
108 if (ret_err != CL_SUCCESS)
113 return CL_INVALID_PLATFORM;
119 cl_int ret_err = CL_INVALID_VALUE;
121 size_t devVerSize = 0;
122 ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0,
nullptr, &devVerSize);
124 std::string devVer(devVerSize,
'\0');
125 ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(),
128 if (ret_err != CL_SUCCESS)
133 return CL_INVALID_DEVICE;
139 const std::vector<std::string> &exts,
141 cl_int ret_err = CL_INVALID_VALUE;
144 ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0,
nullptr, &extSize);
146 std::string extStr(extSize,
'\0');
147 ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(),
150 if (ret_err != CL_SUCCESS)
154 for (
const std::string &ext : exts)
155 if (!(supported = (extStr.find(ext) != std::string::npos)))
162 template <const
char *FuncName,
typename T>
166 thread_local
static std::map<pi_context, T> FuncPtrs;
169 auto It = FuncPtrs.find(context);
170 if (It != FuncPtrs.end()) {
175 return F ? PI_SUCCESS : PI_ERROR_INVALID_VALUE;
180 clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
181 sizeof(
cl_uint), &deviceCount,
nullptr);
183 if (ret_err != CL_SUCCESS || deviceCount < 1) {
184 return PI_ERROR_INVALID_CONTEXT;
187 std::vector<cl_device_id> devicesInCtx(deviceCount);
188 ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
189 deviceCount *
sizeof(cl_device_id),
190 devicesInCtx.data(),
nullptr);
192 if (ret_err != CL_SUCCESS) {
193 return PI_ERROR_INVALID_CONTEXT;
196 cl_platform_id curPlatform;
197 ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
198 sizeof(cl_platform_id), &curPlatform,
nullptr);
200 if (ret_err != CL_SUCCESS) {
201 return PI_ERROR_INVALID_CONTEXT;
205 (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
209 FuncPtrs[context] =
nullptr;
210 return PI_ERROR_INVALID_VALUE;
214 FuncPtrs[context] = FuncPtr;
216 return cast<pi_result>(ret_err);
227 clHostMemAllocINTEL_fn HFunc =
nullptr;
228 clSharedMemAllocINTEL_fn SFunc =
nullptr;
229 clDeviceMemAllocINTEL_fn DFunc =
nullptr;
230 cl_context CLContext;
231 cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
232 sizeof(cl_context), &CLContext,
nullptr);
233 if (CLErr != CL_SUCCESS) {
234 return cast<pi_result>(CLErr);
237 getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
238 cast<pi_context>(CLContext), &HFunc);
240 clSetKernelExecInfo(cast<cl_kernel>(kernel),
241 CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
245 getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
246 cast<pi_context>(CLContext), &DFunc);
248 clSetKernelExecInfo(cast<cl_kernel>(kernel),
249 CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
253 getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
254 cast<pi_context>(CLContext), &SFunc);
256 clSetKernelExecInfo(cast<cl_kernel>(kernel),
257 CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
266 size_t paramValueSize,
void *paramValue,
267 size_t *paramValueSizeRet) {
285 return PI_ERROR_INVALID_VALUE;
290 cl_device_id deviceID = cast<cl_device_id>(device);
292 if (ret_err != CL_SUCCESS) {
293 return cast<pi_result>(ret_err);
302 cl_device_atomic_capabilities cl_capabilities = 0;
303 cl_int ret_err = clGetDeviceInfo(
304 deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
305 sizeof(cl_device_atomic_capabilities), &cl_capabilities,
nullptr);
306 if (ret_err != CL_SUCCESS)
307 return cast<pi_result>(ret_err);
310 cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED |
311 CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
312 CL_DEVICE_ATOMIC_ORDER_SEQ_CST;
313 cl_capabilities &= mask;
318 if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
321 if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) {
334 return static_cast<pi_result>(CL_INVALID_VALUE);
336 std::memcpy(paramValue, &capabilities,
sizeof(capabilities));
339 if (paramValueSizeRet)
340 *paramValueSizeRet =
sizeof(capabilities);
342 return static_cast<pi_result>(CL_SUCCESS);
356 cl_device_id deviceID = cast<cl_device_id>(device);
358 if (ret_err != CL_SUCCESS)
361 cl_device_atomic_capabilities devCapabilities = 0;
363 ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
364 sizeof(cl_device_atomic_capabilities),
365 &devCapabilities,
nullptr);
366 if (ret_err != CL_SUCCESS)
368 assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
369 "Violates minimum mandated guarantee");
376 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
380 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
396 if (paramValueSize <
sizeof(cl_device_atomic_capabilities))
397 return PI_ERROR_INVALID_VALUE;
401 if (paramValueSizeRet)
402 *paramValueSizeRet =
sizeof(result);
414 cl_device_id deviceID = cast<cl_device_id>(device);
416 if (ret_err != CL_SUCCESS)
419 cl_device_atomic_capabilities devCapabilities = 0;
421 ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
422 sizeof(cl_device_atomic_capabilities),
423 &devCapabilities,
nullptr);
424 if (ret_err != CL_SUCCESS)
426 assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) &&
427 "Violates minimum mandated guarantee");
428 assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) &&
429 "Violates minimum mandated guarantee");
433 if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
449 if (paramValueSize <
sizeof(cl_device_atomic_capabilities))
450 return PI_ERROR_INVALID_VALUE;
454 if (paramValueSizeRet)
455 *paramValueSizeRet =
sizeof(result);
470 cl_device_id deviceID = cast<cl_device_id>(device);
472 if (ret_err != CL_SUCCESS)
475 cl_device_atomic_capabilities devCapabilities = 0;
477 ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
478 sizeof(cl_device_atomic_capabilities),
479 &devCapabilities,
nullptr);
480 if (ret_err != CL_SUCCESS)
482 assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
483 "Violates minimum mandated guarantee");
490 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
494 if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
510 if (paramValueSize <
sizeof(cl_device_atomic_capabilities))
511 return PI_ERROR_INVALID_VALUE;
515 if (paramValueSizeRet)
516 *paramValueSizeRet =
sizeof(result);
520 cl_int ret_err = CL_SUCCESS;
522 bool supported =
false;
525 cast<cl_device_id>(device),
526 {
"cl_khr_int64_base_atomics",
"cl_khr_int64_extended_atomics"},
528 if (ret_err != CL_SUCCESS)
547 cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
548 cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
549 sizeof(cl_device_type), &devType,
nullptr);
553 cl_bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU);
562 if (paramValueSizeRet)
563 *paramValueSizeRet = paramValueSize;
565 size_t *out = cast<size_t *>(paramValue);
566 if (paramValueSize >=
sizeof(
size_t))
568 if (paramValueSize >= 2 *
sizeof(
size_t))
570 if (paramValueSize >= 3 *
sizeof(
size_t))
584 if (err != CL_SUCCESS)
588 err = clGetDeviceInfo(cast<cl_device_id>(device),
589 cast<cl_device_info>(paramName), paramValueSize,
590 paramValue, paramValueSizeRet);
591 if (err != CL_SUCCESS)
594 if (paramValue && *
static_cast<cl_uint *
>(paramValue) == 0u) {
607 if (paramValue && paramValueSize <
sizeof(
cl_uint))
608 return static_cast<pi_result>(CL_INVALID_VALUE);
609 if (paramValueSizeRet)
610 *paramValueSizeRet =
sizeof(
cl_uint);
617 return static_cast<pi_result>(CL_SUCCESS);
620 cl_int result = clGetDeviceInfo(
621 cast<cl_device_id>(device), cast<cl_device_info>(paramName),
622 paramValueSize, paramValue, paramValueSizeRet);
629 cl_int result = clGetPlatformIDs(cast<cl_uint>(num_entries),
630 cast<cl_platform_id *>(platforms),
631 cast<cl_uint *>(num_platforms));
634 if (result == CL_PLATFORM_NOT_FOUND_KHR) {
635 assert(num_platforms != 0);
645 assert(nativeHandle);
646 *platform =
reinterpret_cast<pi_platform>(nativeHandle);
653 cl_int result = clGetDeviceIDs(
654 cast<cl_platform_id>(platform), cast<cl_device_type>(
device_type),
655 cast<cl_uint>(num_entries), cast<cl_device_id *>(devices),
656 cast<cl_uint *>(num_devices));
659 if (result == CL_DEVICE_NOT_FOUND) {
660 assert(num_devices != 0);
664 return cast<pi_result>(result);
685 const char *image_target =
nullptr;
690 clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
692 if (ret_err != CL_SUCCESS) {
693 *selected_image_ind = invalid_ind;
694 return cast<pi_result>(ret_err);
704 case CL_DEVICE_TYPE_CPU:
707 case CL_DEVICE_TYPE_GPU:
710 case CL_DEVICE_TYPE_ACCELERATOR:
722 for (
pi_uint32 i = 0; i < num_images; ++i) {
723 if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) {
724 *selected_image_ind = i;
727 if (strcmp(images[i]->DeviceTargetSpec,
732 if ((*selected_image_ind = fallback) != invalid_ind)
735 return PI_ERROR_INVALID_BINARY;
740 assert(piDevice !=
nullptr);
741 *piDevice =
reinterpret_cast<pi_device>(nativeHandle);
751 return PI_ERROR_INVALID_VALUE;
754 assert(Properties[2] == 0);
755 if (Properties[2] != 0)
756 return PI_ERROR_INVALID_VALUE;
761 assert(queue &&
"piQueueCreate failed, queue argument is null");
763 cl_platform_id curPlatform;
765 clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_PLATFORM,
766 sizeof(cl_platform_id), &curPlatform,
nullptr);
771 assert(!(properties &
778 cl_command_queue_properties SupportByOpenCL =
779 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE |
780 CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT;
788 *queue = cast<pi_queue>(clCreateCommandQueue(
789 cast<cl_context>(context), cast<cl_device_id>(device),
790 cast<cl_command_queue_properties>(properties) & SupportByOpenCL,
792 return cast<pi_result>(ret_err);
795 cl_queue_properties CreationFlagProperties[] = {
797 cast<cl_command_queue_properties>(properties) & SupportByOpenCL, 0};
798 *queue = cast<pi_queue>(clCreateCommandQueueWithProperties(
799 cast<cl_context>(context), cast<cl_device_id>(device),
800 CreationFlagProperties, &ret_err));
801 return cast<pi_result>(ret_err);
805 size_t param_value_size,
void *param_value,
806 size_t *param_value_size_ret) {
807 if (queue ==
nullptr) {
808 return PI_ERROR_INVALID_QUEUE;
811 switch (param_name) {
814 return PI_ERROR_INVALID_VALUE;
816 cl_int CLErr = clGetCommandQueueInfo(
817 cast<cl_command_queue>(queue), cast<cl_command_queue_info>(param_name),
818 param_value_size, param_value, param_value_size_ret);
819 if (CLErr != CL_SUCCESS) {
820 return cast<pi_result>(CLErr);
828 bool ownNativeHandle,
830 (void)ownNativeHandle;
831 assert(piQueue !=
nullptr);
832 *piQueue =
reinterpret_cast<pi_queue>(nativeHandle);
833 clRetainCommandQueue(cast<cl_command_queue>(nativeHandle));
841 clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
842 sizeof(
cl_uint), &deviceCount,
nullptr);
844 std::vector<cl_device_id> devicesInCtx(deviceCount);
846 if (ret_err != CL_SUCCESS || deviceCount < 1) {
847 if (res_program !=
nullptr)
848 *res_program =
nullptr;
849 return cast<pi_result>(CL_INVALID_CONTEXT);
852 ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
853 deviceCount *
sizeof(cl_device_id),
854 devicesInCtx.data(),
nullptr);
858 cl_platform_id curPlatform;
859 ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
860 sizeof(cl_platform_id), &curPlatform,
nullptr);
873 for (cl_device_id dev : devicesInCtx) {
883 bool supported =
false;
889 return cast<pi_result>(CL_INVALID_OPERATION);
892 if (res_program !=
nullptr)
893 *res_program = cast<pi_program>(clCreateProgramWithIL(
894 cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
901 for (cl_device_id dev : devicesInCtx) {
902 bool supported =
false;
908 return cast<pi_result>(CL_INVALID_OPERATION);
912 cl_program(CL_API_CALL *)(cl_context,
const void *, size_t,
cl_int *);
914 reinterpret_cast<apiFuncT
>(clGetExtensionFunctionAddressForPlatform(
915 curPlatform,
"clCreateProgramWithILKHR"));
917 assert(funcPtr !=
nullptr);
918 if (res_program !=
nullptr)
919 *res_program = cast<pi_program>(
920 funcPtr(cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
922 err = PI_ERROR_INVALID_VALUE;
930 assert(piProgram !=
nullptr);
931 *piProgram =
reinterpret_cast<pi_program>(nativeHandle);
945 for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0;
948 normalizedCoords =
static_cast<pi_bool>(sampler_properties[++i]);
955 assert(
false &&
"Cannot recognize sampler property");
960 *result_sampler = cast<pi_sampler>(
961 clCreateSampler(cast<cl_context>(context), normalizedCoords,
962 addressingMode, filterMode, cast<cl_int *>(&error_code)));
967 const pi_mem *arg_value) {
968 return cast<pi_result>(
969 clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
970 sizeof(arg_value), cast<const cl_mem *>(arg_value)));
975 return cast<pi_result>(
976 clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
977 sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
983 assert(piKernel !=
nullptr);
984 *piKernel =
reinterpret_cast<pi_kernel>(nativeHandle);
993 const std::string &sub_str) {
996 for (
const auto &x : str) {
997 if (x == delimiter) {
998 if (str.substr(beg, length) == sub_str)
1008 if (str.substr(beg, length) == sub_str)
1015 cl_device_id device, cl_program program,
const char *FuncName,
1018 const char *func_name,
1021 cl_context CLContext =
nullptr;
1023 clGetProgramInfo(cast<cl_program>(program), CL_PROGRAM_CONTEXT,
1024 sizeof(CLContext), &CLContext,
nullptr);
1026 if (ret_err != CL_SUCCESS)
1027 return cast<pi_result>(ret_err);
1032 cast<pi_context>(CLContext), &FuncT);
1041 *function_pointer_ret = 0;
1046 if (Res != CL_SUCCESS)
1047 return cast<pi_result>(Res);
1049 std::string ClResult(Size,
' ');
1052 ClResult.size(), &ClResult[0],
nullptr);
1053 if (Res != CL_SUCCESS)
1054 return cast<pi_result>(Res);
1059 ClResult.pop_back();
1061 return PI_ERROR_INVALID_KERNEL_NAME;
1063 pi_ret_err = PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1067 pi_ret_err = cast<pi_result>(FuncT(cast<cl_device_id>(device),
1068 cast<cl_program>(program), func_name,
1069 function_pointer_ret));
1073 if (pi_ret_err == CL_INVALID_ARG_VALUE) {
1074 *function_pointer_ret = 0;
1075 return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1084 const void *private_info,
1085 size_t cb,
void *user_data1),
1087 pi_result ret = PI_ERROR_INVALID_OPERATION;
1088 *retcontext = cast<pi_context>(
1089 clCreateContext(properties, cast<cl_uint>(num_devices),
1090 cast<const cl_device_id *>(devices),
pfn_notify,
1091 user_data, cast<cl_int *>(&ret)));
1099 bool ownNativeHandle,
1103 (void)ownNativeHandle;
1104 assert(piContext !=
nullptr);
1105 assert(ownNativeHandle ==
false);
1106 *piContext =
reinterpret_cast<pi_context>(nativeHandle);
1111 size_t paramValueSize,
void *paramValue,
1112 size_t *paramValueSizeRet) {
1113 switch (paramName) {
1129 PI_ERROR_INVALID_ARG_VALUE);
1130 return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
1133 cl_int result = clGetContextInfo(
1134 cast<cl_context>(context), cast<cl_context_info>(paramName),
1135 paramValueSize, paramValue, paramValueSizeRet);
1143 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1147 clCreateBufferWithPropertiesINTEL_fn FuncPtr =
nullptr;
1150 clCreateBufferWithPropertiesINTEL_fn>(
1153 *ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
1154 cast<cl_mem_flags>(flags), size,
host_ptr,
1155 cast<cl_int *>(&ret_err)));
1160 *ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
1161 cast<cl_mem_flags>(flags), size,
1162 host_ptr, cast<cl_int *>(&ret_err)));
1170 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1171 *ret_mem = cast<pi_mem>(
1172 clCreateImage(cast<cl_context>(context), cast<cl_mem_flags>(flags),
1173 cast<const cl_image_format *>(image_format),
1174 cast<const cl_image_desc *>(image_desc),
host_ptr,
1175 cast<cl_int *>(&ret_err)));
1182 void *buffer_create_info,
pi_mem *ret_mem) {
1184 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1185 *ret_mem = cast<pi_mem>(
1186 clCreateSubBuffer(cast<cl_mem>(buffer), cast<cl_mem_flags>(flags),
1187 cast<cl_buffer_create_type>(buffer_create_type),
1188 buffer_create_info, cast<cl_int *>(&ret_err)));
1194 bool ownNativeHandle,
pi_mem *piMem) {
1196 (void)ownNativeHandle;
1197 assert(piMem !=
nullptr);
1198 *piMem =
reinterpret_cast<pi_mem>(nativeHandle);
1203 const char **strings,
1204 const size_t *lengths,
1207 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1208 *ret_program = cast<pi_program>(
1209 clCreateProgramWithSource(cast<cl_context>(context), cast<cl_uint>(count),
1210 strings, lengths, cast<cl_int *>(&ret_err)));
1216 const size_t *lengths,
const unsigned char **binaries,
1220 (void)num_metadata_entries;
1222 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1223 *ret_program = cast<pi_program>(clCreateProgramWithBinary(
1224 cast<cl_context>(context), cast<cl_uint>(num_devices),
1225 cast<const cl_device_id *>(device_list), lengths, binaries,
1226 cast<cl_int *>(binary_status), cast<cl_int *>(&ret_err)));
1231 const pi_device *device_list,
const char *options,
1237 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1238 *ret_program = cast<pi_program>(
1239 clLinkProgram(cast<cl_context>(context), cast<cl_uint>(num_devices),
1240 cast<const cl_device_id *>(device_list), options,
1241 cast<cl_uint>(num_input_programs),
1242 cast<const cl_program *>(input_programs),
1244 cast<cl_int *>(&ret_err)));
1251 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1252 *ret_kernel = cast<pi_kernel>(clCreateKernel(
1253 cast<cl_program>(program), kernel_name, cast<cl_int *>(&ret_err)));
1259 size_t param_value_size,
void *param_value,
1260 size_t *param_value_size_ret) {
1261 if (kernel ==
nullptr) {
1262 return PI_ERROR_INVALID_KERNEL;
1265 switch (param_name) {
1267 return PI_ERROR_INVALID_VALUE;
1269 cl_int result = clGetKernelWorkGroupInfo(
1270 cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1271 cast<cl_kernel_work_group_info>(param_name), param_value_size,
1272 param_value, param_value_size_ret);
1279 size_t input_value_size,
1280 const void *input_value,
1281 size_t param_value_size,
void *param_value,
1282 size_t *param_value_size_ret) {
1283 (void)param_value_size;
1287 std::shared_ptr<void> implicit_input_value;
1296 if (pi_ret_err != PI_SUCCESS)
1298 std::shared_ptr<size_t[]> WGSizes{
new size_t[max_dims]};
1301 max_dims *
sizeof(
size_t), WGSizes.get(),
nullptr);
1302 if (pi_ret_err != PI_SUCCESS)
1304 for (
size_t i = 1; i < max_dims; ++i)
1305 WGSizes.get()[i] = 1;
1306 implicit_input_value = std::move(WGSizes);
1307 input_value_size = max_dims *
sizeof(size_t);
1308 input_value = implicit_input_value.get();
1311 ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
1312 cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1313 cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
1314 sizeof(
size_t), &ret_val, param_value_size_ret));
1316 if (ret_err != CL_SUCCESS)
1317 return cast<pi_result>(ret_err);
1319 *(
static_cast<uint32_t *
>(param_value)) =
static_cast<uint32_t
>(ret_val);
1320 if (param_value_size_ret)
1321 *param_value_size_ret =
sizeof(uint32_t);
1327 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1328 auto *cl_err = cast<cl_int *>(&ret_err);
1330 cl_event e = clCreateUserEvent(cast<cl_context>(context), cl_err);
1331 *ret_event = cast<pi_event>(e);
1332 if (*cl_err != CL_SUCCESS)
1334 *cl_err = clSetUserEventStatus(e, CL_COMPLETE);
1340 bool ownNativeHandle,
1344 (void)ownNativeHandle;
1346 assert(piEvent !=
nullptr);
1347 assert(nativeHandle);
1350 *piEvent =
reinterpret_cast<pi_event>(nativeHandle);
1356 size_t offset,
size_t size,
1361 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1362 *ret_map = cast<void *>(clEnqueueMapBuffer(
1363 cast<cl_command_queue>(command_queue), cast<cl_mem>(buffer),
1364 cast<cl_bool>(blocking_map), map_flags, offset, size,
1365 cast<cl_uint>(num_events_in_wait_list),
1366 cast<const cl_event *>(event_wait_list), cast<cl_event *>(event),
1367 cast<cl_int *>(&ret_err)));
1386 void *Ptr =
nullptr;
1387 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1390 clHostMemAllocINTEL_fn FuncPtr =
nullptr;
1391 RetVal = getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
1395 Ptr = FuncPtr(cast<cl_context>(context),
1396 cast<cl_mem_properties_intel *>(properties), size,
alignment,
1397 cast<cl_int *>(&RetVal));
1403 if (RetVal == PI_SUCCESS &&
alignment != 0)
1404 assert(
reinterpret_cast<std::uintptr_t
>(*result_ptr) %
alignment == 0 &&
1405 "allocation not aligned correctly");
1423 void *Ptr =
nullptr;
1424 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1427 clDeviceMemAllocINTEL_fn FuncPtr =
nullptr;
1429 getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
1433 Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
1434 cast<cl_mem_properties_intel *>(properties), size,
alignment,
1435 cast<cl_int *>(&RetVal));
1441 if (RetVal == PI_SUCCESS &&
alignment != 0)
1442 assert(
reinterpret_cast<std::uintptr_t
>(*result_ptr) %
alignment == 0 &&
1443 "allocation not aligned correctly");
1461 void *Ptr =
nullptr;
1462 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1465 clSharedMemAllocINTEL_fn FuncPtr =
nullptr;
1467 getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
1471 Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
1472 cast<cl_mem_properties_intel *>(properties), size,
alignment,
1473 cast<cl_int *>(&RetVal));
1479 (RetVal == PI_SUCCESS &&
1480 reinterpret_cast<std::uintptr_t
>(*result_ptr) %
alignment == 0));
1491 clMemBlockingFreeINTEL_fn FuncPtr =
nullptr;
1493 pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1495 getExtFuncFromContext<clMemBlockingFreeName, clMemBlockingFreeINTEL_fn>(
1499 RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr));
1513 size_t arg_size,
const void *arg_value) {
1519 cl_context CLContext;
1520 cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
1521 sizeof(cl_context), &CLContext,
nullptr);
1522 if (CLErr != CL_SUCCESS) {
1523 return cast<pi_result>(CLErr);
1526 clSetKernelArgMemPointerINTEL_fn FuncPtr =
nullptr;
1528 clSetKernelArgMemPointerINTEL_fn>(
1529 cast<pi_context>(CLContext), &FuncPtr);
1534 auto PtrToPtr =
reinterpret_cast<const intptr_t *
>(arg_value);
1535 auto DerefPtr =
reinterpret_cast<void *
>(*PtrToPtr);
1537 cast<pi_result>(FuncPtr(cast<cl_kernel>(kernel), arg_index, DerefPtr));
1554 size_t count,
pi_uint32 num_events_in_waitlist,
1559 cl_context CLContext;
1561 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1562 sizeof(cl_context), &CLContext,
nullptr);
1563 if (CLErr != CL_SUCCESS) {
1564 return cast<pi_result>(CLErr);
1567 clEnqueueMemsetINTEL_fn FuncPtr =
nullptr;
1569 getExtFuncFromContext<clEnqueueMemsetName, clEnqueueMemsetINTEL_fn>(
1570 cast<pi_context>(CLContext), &FuncPtr);
1573 RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue), ptr, value,
1574 count, num_events_in_waitlist,
1575 cast<const cl_event *>(events_waitlist),
1576 cast<cl_event *>(event)));
1593 const void *src_ptr,
size_t size,
1599 cl_context CLContext;
1601 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1602 sizeof(cl_context), &CLContext,
nullptr);
1603 if (CLErr != CL_SUCCESS) {
1604 return cast<pi_result>(CLErr);
1607 clEnqueueMemcpyINTEL_fn FuncPtr =
nullptr;
1609 getExtFuncFromContext<clEnqueueMemcpyName, clEnqueueMemcpyINTEL_fn>(
1610 cast<pi_context>(CLContext), &FuncPtr);
1613 RetVal = cast<pi_result>(
1614 FuncPtr(cast<cl_command_queue>(queue), blocking, dst_ptr, src_ptr, size,
1615 num_events_in_waitlist, cast<const cl_event *>(events_waitlist),
1616 cast<cl_event *>(event)));
1641 return PI_ERROR_INVALID_VALUE;
1643 return cast<pi_result>(clEnqueueMarkerWithWaitList(
1644 cast<cl_command_queue>(queue), num_events_in_waitlist,
1645 cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
1688 return cast<pi_result>(
1689 clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0,
nullptr,
1690 reinterpret_cast<cl_event *
>(event)));
1733 size_t pitch,
size_t pattern_size,
1734 const void *pattern,
size_t width,
1739 std::ignore = queue;
1741 std::ignore = pitch;
1742 std::ignore = pattern_size;
1743 std::ignore = pattern;
1744 std::ignore = width;
1745 std::ignore = height;
1746 std::ignore = num_events_in_waitlist;
1747 std::ignore = events_waitlist;
1748 std::ignore = event;
1749 return PI_ERROR_INVALID_OPERATION;
1764 pi_queue queue,
void *ptr,
size_t pitch,
int value,
size_t width,
1765 size_t height,
pi_uint32 num_events_in_waitlist,
1767 std::ignore = queue;
1769 std::ignore = pitch;
1770 std::ignore = value;
1771 std::ignore = width;
1772 std::ignore = height;
1773 std::ignore = num_events_in_waitlist;
1774 std::ignore = events_waitlist;
1775 std::ignore = event;
1776 return PI_ERROR_INVALID_OPERATION;
1795 const void *src_ptr,
size_t src_pitch,
size_t width,
size_t height,
1798 std::ignore = queue;
1799 std::ignore = blocking;
1800 std::ignore = dst_ptr;
1801 std::ignore = dst_pitch;
1802 std::ignore = src_ptr;
1803 std::ignore = src_pitch;
1804 std::ignore = width;
1805 std::ignore = height;
1806 std::ignore = num_events_in_waitlist;
1807 std::ignore = events_waitlist;
1808 std::ignore = event;
1809 return PI_ERROR_INVALID_OPERATION;
1830 size_t param_value_size,
void *param_value,
1831 size_t *param_value_size_ret) {
1833 clGetMemAllocInfoINTEL_fn FuncPtr =
nullptr;
1835 getExtFuncFromContext<clGetMemAllocInfoName, clGetMemAllocInfoINTEL_fn>(
1839 RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr, param_name,
1840 param_value_size, param_value,
1841 param_value_size_ret));
1848 cl_command_queue, cl_program,
const char *,
cl_bool, size_t, size_t,
1849 const void *,
cl_uint,
const cl_event *, cl_event *);
1852 cl_command_queue, cl_program,
const char *,
cl_bool, size_t, size_t,
void *,
1853 cl_uint,
const cl_event *, cl_event *);
1870 pi_bool blocking_write,
size_t count,
size_t offset,
const void *src,
1873 cl_context Ctx =
nullptr;
1875 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1876 sizeof(Ctx), &Ctx,
nullptr);
1878 if (Res != CL_SUCCESS)
1879 return cast<pi_result>(Res);
1882 Res = getExtFuncFromContext<clEnqueueWriteGlobalVariableName, decltype(F)>(
1883 cast<pi_context>(Ctx), &F);
1885 if (!F || Res != CL_SUCCESS)
1886 return PI_ERROR_INVALID_OPERATION;
1887 Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
1888 blocking_write, count, offset, src, num_events_in_wait_list,
1889 cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
1890 return cast<pi_result>(Res);
1908 size_t count,
size_t offset,
void *dst,
pi_uint32 num_events_in_wait_list,
1910 cl_context Ctx =
nullptr;
1912 clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1913 sizeof(Ctx), &Ctx,
nullptr);
1915 if (Res != CL_SUCCESS)
1916 return cast<pi_result>(Res);
1919 Res = getExtFuncFromContext<clEnqueueReadGlobalVariableName, decltype(F)>(
1920 cast<pi_context>(Ctx), &F);
1922 if (!F || Res != CL_SUCCESS)
1923 return PI_ERROR_INVALID_OPERATION;
1924 Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
1925 blocking_read, count, offset, dst, num_events_in_wait_list,
1926 cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
1927 return cast<pi_result>(Res);
1942 size_t param_value_size,
1943 const void *param_value) {
1948 return cast<pi_result>(clSetKernelExecInfo(
1949 cast<cl_kernel>(kernel), param_name, param_value_size, param_value));
1954 cl_program program,
cl_uint spec_id,
size_t spec_size,
1955 const void *spec_value);
1960 const void *spec_value) {
1961 cl_program ClProg = cast<cl_program>(prog);
1962 cl_context Ctx =
nullptr;
1965 clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT,
sizeof(Ctx), &Ctx, &RetSize);
1967 if (Res != CL_SUCCESS)
1968 return cast<pi_result>(Res);
1972 decltype(F)>(cast<pi_context>(Ctx), &F);
1974 if (!F || Res != CL_SUCCESS)
1975 return PI_ERROR_INVALID_OPERATION;
1976 Res = F(ClProg, spec_id, spec_size, spec_value);
1977 return cast<pi_result>(Res);
1988 assert(nativeHandle !=
nullptr);
2035 (void)PluginParameter;
2040 uint64_t *HostTime) {
2042 cl_platform_id platform;
2043 cl_device_id deviceID = cast<cl_device_id>(Device);
2046 auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
2047 sizeof(cl_platform_id), &platform,
nullptr);
2048 if (ret_err != CL_SUCCESS) {
2049 return cast<pi_result>(ret_err);
2054 if (ret_err != CL_SUCCESS) {
2055 return cast<pi_result>(ret_err);
2062 "OpenCL version for device and/or platform is less than 2.1",
2063 PI_ERROR_INVALID_OPERATION);
2064 return PI_ERROR_INVALID_OPERATION;
2069 clGetDeviceAndHostTimer(deviceID, DeviceTime,
2070 HostTime ==
nullptr ? &dummy : HostTime);
2072 }
else if (HostTime) {
2073 clGetHostTimer(deviceID, HostTime);
2086 size_t PluginVersionSize =
sizeof(PluginInit->
PluginVersion);
2088 return PI_ERROR_INVALID_VALUE;
2091 #define _PI_CL(pi_api, ocl_api) \
2092 (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
2229 #define __SYCL_PLUGIN_DLL_NAME "pi_opencl.dll"
2230 #include "../common_win_pi_trace/common_win_pi_trace.hpp"
2231 #undef __SYCL_PLUGIN_DLL_NAME