DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_opencl.cpp
Go to the documentation of this file.
1 //==---------- pi_opencl.cpp - OpenCL Plugin -------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
10 
16 
17 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
18 
19 #include <CL/sycl/detail/cl.h>
20 #include <CL/sycl/detail/pi.h>
21 
22 #include <algorithm>
23 #include <cassert>
24 #include <cstring>
25 #include <iostream>
26 #include <limits>
27 #include <map>
28 #include <sstream>
29 #include <string>
30 #include <vector>
31 
32 #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \
33  if (err != CL_SUCCESS) { \
34  if (ptr != nullptr) \
35  *ptr = nullptr; \
36  return cast<pi_result>(reterr); \
37  }
38 
40 
41 // Want all the needed casts be explicit, do not define conversion operators.
42 template <class To, class From> To cast(From value) {
43  // TODO: see if more sanity checks are possible.
44  static_assert(sizeof(From) == sizeof(To), "cast failed size check");
45  return (To)(value);
46 }
47 
48 // Older versions of GCC don't like "const" here
49 #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2))
50 #define CONSTFIX constexpr
51 #else
52 #define CONSTFIX const
53 #endif
54 
55 // Names of USM functions that are queried from OpenCL
56 CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL";
57 CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL";
58 CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL";
59 CONSTFIX char clMemFreeName[] = "clMemFreeINTEL";
60 CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL";
62  "clCreateBufferWithPropertiesINTEL";
63 CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL";
64 CONSTFIX char clEnqueueMemsetName[] = "clEnqueueMemsetINTEL";
65 CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL";
66 CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL";
68  "clSetProgramSpecializationConstant";
70  "clGetDeviceFunctionPointerINTEL";
71 
72 #undef CONSTFIX
73 
74 // Global variables for PI_PLUGIN_SPECIFIC_ERROR
75 constexpr size_t MaxMessageSize = 256;
77 thread_local char ErrorMessage[MaxMessageSize];
78 
79 // Utility function for setting a message and warning
80 [[maybe_unused]] static void setErrorMessage(const char *message,
81  pi_result error_code) {
82  assert(strlen(message) <= MaxMessageSize);
83  strcpy(ErrorMessage, message);
84  ErrorMessageCode = error_code;
85 }
86 
87 // Returns plugin specific error and warning messages
89  *message = &ErrorMessage[0];
90  return ErrorMessageCode;
91 }
92 
93 // USM helper function to get an extension function pointer
94 template <const char *FuncName, typename T>
96  // TODO
97  // Potentially redo caching as PI interface changes.
98  thread_local static std::map<pi_context, T> FuncPtrs;
99 
100  // if cached, return cached FuncPtr
101  if (auto F = FuncPtrs[context]) {
102  // if cached that extension is not available return nullptr and
103  // PI_INVALID_VALUE
104  *fptr = F;
105  return F ? PI_SUCCESS : PI_INVALID_VALUE;
106  }
107 
108  cl_uint deviceCount;
109  cl_int ret_err =
110  clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
111  sizeof(cl_uint), &deviceCount, nullptr);
112 
113  if (ret_err != CL_SUCCESS || deviceCount < 1) {
114  return PI_INVALID_CONTEXT;
115  }
116 
117  std::vector<cl_device_id> devicesInCtx(deviceCount);
118  ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
119  deviceCount * sizeof(cl_device_id),
120  devicesInCtx.data(), nullptr);
121 
122  if (ret_err != CL_SUCCESS) {
123  return PI_INVALID_CONTEXT;
124  }
125 
126  cl_platform_id curPlatform;
127  ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
128  sizeof(cl_platform_id), &curPlatform, nullptr);
129 
130  if (ret_err != CL_SUCCESS) {
131  return PI_INVALID_CONTEXT;
132  }
133 
134  T FuncPtr =
135  (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
136 
137  if (!FuncPtr) {
138  // Cache that the extension is not available
139  FuncPtrs[context] = nullptr;
140  return PI_INVALID_VALUE;
141  }
142 
143  *fptr = FuncPtr;
144  FuncPtrs[context] = FuncPtr;
145 
146  return cast<pi_result>(ret_err);
147 }
148 
154  // We test that each alloc type is supported before we actually try to
155  // set KernelExecInfo.
156  cl_bool TrueVal = CL_TRUE;
157  clHostMemAllocINTEL_fn HFunc = nullptr;
158  clSharedMemAllocINTEL_fn SFunc = nullptr;
159  clDeviceMemAllocINTEL_fn DFunc = nullptr;
160  cl_context CLContext;
161  cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
162  sizeof(cl_context), &CLContext, nullptr);
163  if (CLErr != CL_SUCCESS) {
164  return cast<pi_result>(CLErr);
165  }
166 
167  getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
168  cast<pi_context>(CLContext), &HFunc);
169  if (HFunc) {
170  clSetKernelExecInfo(cast<cl_kernel>(kernel),
171  CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
172  sizeof(cl_bool), &TrueVal);
173  }
174 
175  getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
176  cast<pi_context>(CLContext), &DFunc);
177  if (DFunc) {
178  clSetKernelExecInfo(cast<cl_kernel>(kernel),
179  CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
180  sizeof(cl_bool), &TrueVal);
181  }
182 
183  getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
184  cast<pi_context>(CLContext), &SFunc);
185  if (SFunc) {
186  clSetKernelExecInfo(cast<cl_kernel>(kernel),
187  CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
188  sizeof(cl_bool), &TrueVal);
189  }
190  return PI_SUCCESS;
191 }
192 
193 extern "C" {
194 
196  size_t paramValueSize, void *paramValue,
197  size_t *paramValueSizeRet) {
198  switch (paramName) {
199  // TODO: Check regularly to see if support in enabled in OpenCL.
200  // Intel GPU EU device-specific information extensions.
201  // Some of the queries are enabled by cl_intel_device_attribute_query
202  // extension, but it's not yet in the Registry.
211  // TODO: Check if device UUID extension is enabled in OpenCL.
212  // For details about Intel UUID extension, see
213  // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md
214  case PI_DEVICE_INFO_UUID:
217  return PI_INVALID_VALUE;
219  size_t extSize;
220  cl_bool result = clGetDeviceInfo(
221  cast<cl_device_id>(device), CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize);
222  std::string extStr(extSize, '\0');
223  result = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_EXTENSIONS,
224  extSize, &extStr.front(), nullptr);
225  if (extStr.find("cl_khr_int64_base_atomics") == std::string::npos ||
226  extStr.find("cl_khr_int64_extended_atomics") == std::string::npos)
227  result = false;
228  else
229  result = true;
230  std::memcpy(paramValue, &result, sizeof(cl_bool));
231  return PI_SUCCESS;
232  }
234  cl_bool result = true;
235  std::memcpy(paramValue, &result, sizeof(cl_bool));
236  return PI_SUCCESS;
237  }
239  cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
240  cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
241  sizeof(cl_device_type), &devType, nullptr);
242 
243  // FIXME: here we assume that program built for a root GPU device can be
244  // used on its sub-devices without re-building
245  cl_bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU);
246  std::memcpy(paramValue, &result, sizeof(cl_bool));
247  return PI_SUCCESS;
248  }
250  // Returns the maximum sizes of a work group for each dimension one
251  // could use to submit a kernel. There is no such query defined in OpenCL
252  // so we'll return the maximum value.
253  {
254  if (paramValueSizeRet)
255  *paramValueSizeRet = paramValueSize;
256  static constexpr size_t Max = (std::numeric_limits<size_t>::max)();
257  size_t *out = cast<size_t *>(paramValue);
258  if (paramValueSize >= sizeof(size_t))
259  out[0] = Max;
260  if (paramValueSize >= 2 * sizeof(size_t))
261  out[1] = Max;
262  if (paramValueSize >= 3 * sizeof(size_t))
263  out[2] = Max;
264  return PI_SUCCESS;
265  }
266 
267  default:
268  cl_int result = clGetDeviceInfo(
269  cast<cl_device_id>(device), cast<cl_device_info>(paramName),
270  paramValueSize, paramValue, paramValueSizeRet);
271  return static_cast<pi_result>(result);
272  }
273 }
274 
276  pi_uint32 *num_platforms) {
277  cl_int result = clGetPlatformIDs(cast<cl_uint>(num_entries),
278  cast<cl_platform_id *>(platforms),
279  cast<cl_uint *>(num_platforms));
280 
281  // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms
282  if (result == CL_PLATFORM_NOT_FOUND_KHR) {
283  assert(num_platforms != 0);
284  *num_platforms = 0;
285  result = PI_SUCCESS;
286  }
287  return static_cast<pi_result>(result);
288 }
289 
292  assert(platform);
293  assert(nativeHandle);
294  *platform = reinterpret_cast<pi_platform>(nativeHandle);
295  return PI_SUCCESS;
296 }
297 
299  pi_uint32 num_entries, pi_device *devices,
300  pi_uint32 *num_devices) {
301  cl_int result = clGetDeviceIDs(
302  cast<cl_platform_id>(platform), cast<cl_device_type>(device_type),
303  cast<cl_uint>(num_entries), cast<cl_device_id *>(devices),
304  cast<cl_uint *>(num_devices));
305 
306  // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices
307  if (result == CL_DEVICE_NOT_FOUND) {
308  assert(num_devices != 0);
309  *num_devices = 0;
310  result = PI_SUCCESS;
311  }
312  return cast<pi_result>(result);
313 }
314 
316  pi_uint32 num_images,
317  pi_uint32 *selected_image_ind) {
318 
319  // TODO: this is a bare-bones implementation for choosing a device image
320  // that would be compatible with the targeted device. An AOT-compiled
321  // image is preferred over SPIR-V for known devices (i.e. Intel devices)
322  // The implementation makes no effort to differentiate between multiple images
323  // for the given device, and simply picks the first one compatible
324  // Real implementation will use the same mechanism OpenCL ICD dispatcher
325  // uses. Something like:
326  // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_INVALID_CONTEXT);
327  // return context->dispatch->piextDeviceSelectIR(
328  // ctx, images, num_images, selected_image);
329  // where context->dispatch is set to the dispatch table provided by PI
330  // plugin for platform/device the ctx was created for.
331 
332  // Choose the binary target for the provided device
333  const char *image_target = nullptr;
334  // Get the type of the device
335  cl_device_type device_type;
336  constexpr pi_uint32 invalid_ind = std::numeric_limits<pi_uint32>::max();
337  cl_int ret_err =
338  clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
339  sizeof(cl_device_type), &device_type, nullptr);
340  if (ret_err != CL_SUCCESS) {
341  *selected_image_ind = invalid_ind;
342  return cast<pi_result>(ret_err);
343  }
344 
345  switch (device_type) {
346  // TODO: Factor out vendor specifics into a separate source
347  // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp?
348 
349  // We'll attempt to find an image that was AOT-compiled
350  // from a SPIR-V image into an image specific for:
351 
352  case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU
354  break;
355  case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU
357  break;
358  case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA
360  break;
361  default:
362  // Otherwise, we'll attempt to find and JIT-compile
363  // a device-independent SPIR-V image
365  break;
366  }
367 
368  // Find the appropriate device image, fallback to spirv if not found
369  pi_uint32 fallback = invalid_ind;
370  for (pi_uint32 i = 0; i < num_images; ++i) {
371  if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) {
372  *selected_image_ind = i;
373  return PI_SUCCESS;
374  }
375  if (strcmp(images[i]->DeviceTargetSpec,
377  fallback = i;
378  }
379  // Points to a spirv image, if such indeed was found
380  if ((*selected_image_ind = fallback) != invalid_ind)
381  return PI_SUCCESS;
382  // No image can be loaded for the given device
383  return PI_INVALID_BINARY;
384 }
385 
387  pi_platform, pi_device *piDevice) {
388  assert(piDevice != nullptr);
389  *piDevice = reinterpret_cast<pi_device>(nativeHandle);
390  return PI_SUCCESS;
391 }
392 
394  pi_queue_properties properties, pi_queue *queue) {
395  assert(queue && "piQueueCreate failed, queue argument is null");
396 
397  cl_platform_id curPlatform;
398  cl_int ret_err =
399  clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_PLATFORM,
400  sizeof(cl_platform_id), &curPlatform, nullptr);
401 
402  CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err);
403 
404  size_t platVerSize;
405  ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, 0, nullptr,
406  &platVerSize);
407 
408  CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err);
409 
410  std::string platVer(platVerSize, '\0');
411  ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, platVerSize,
412  &platVer.front(), nullptr);
413 
414  CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err);
415 
416  if (platVer.find("OpenCL 1.0") != std::string::npos ||
417  platVer.find("OpenCL 1.1") != std::string::npos ||
418  platVer.find("OpenCL 1.2") != std::string::npos) {
419  *queue = cast<pi_queue>(clCreateCommandQueue(
420  cast<cl_context>(context), cast<cl_device_id>(device),
421  cast<cl_command_queue_properties>(properties), &ret_err));
422  return cast<pi_result>(ret_err);
423  }
424 
425  cl_queue_properties CreationFlagProperties[] = {
426  CL_QUEUE_PROPERTIES, cast<cl_command_queue_properties>(properties), 0};
427  *queue = cast<pi_queue>(clCreateCommandQueueWithProperties(
428  cast<cl_context>(context), cast<cl_device_id>(device),
429  CreationFlagProperties, &ret_err));
430  return cast<pi_result>(ret_err);
431 }
432 
435  bool ownNativeHandle,
436  pi_queue *piQueue) {
437  (void)ownNativeHandle;
438  assert(piQueue != nullptr);
439  *piQueue = reinterpret_cast<pi_queue>(nativeHandle);
440  clRetainCommandQueue(cast<cl_command_queue>(nativeHandle));
441  return PI_SUCCESS;
442 }
443 
445  pi_program *res_program) {
446  cl_uint deviceCount;
447  cl_int ret_err =
448  clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
449  sizeof(cl_uint), &deviceCount, nullptr);
450 
451  std::vector<cl_device_id> devicesInCtx(deviceCount);
452 
453  if (ret_err != CL_SUCCESS || deviceCount < 1) {
454  if (res_program != nullptr)
455  *res_program = nullptr;
456  return cast<pi_result>(CL_INVALID_CONTEXT);
457  }
458 
459  ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
460  deviceCount * sizeof(cl_device_id),
461  devicesInCtx.data(), nullptr);
462 
463  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
464 
465  cl_platform_id curPlatform;
466  ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
467  sizeof(cl_platform_id), &curPlatform, nullptr);
468 
469  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
470 
471  size_t devVerSize;
472  ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, 0, nullptr,
473  &devVerSize);
474  std::string devVer(devVerSize, '\0');
475  ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_VERSION, devVerSize,
476  &devVer.front(), nullptr);
477 
478  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
479 
480  pi_result err = PI_SUCCESS;
481  if (devVer.find("OpenCL 1.0") == std::string::npos &&
482  devVer.find("OpenCL 1.1") == std::string::npos &&
483  devVer.find("OpenCL 1.2") == std::string::npos &&
484  devVer.find("OpenCL 2.0") == std::string::npos) {
485  if (res_program != nullptr)
486  *res_program = cast<pi_program>(clCreateProgramWithIL(
487  cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
488  return err;
489  }
490 
491  size_t extSize;
492  ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_EXTENSIONS, 0, nullptr,
493  &extSize);
494  std::string extStr(extSize, '\0');
495  ret_err = clGetPlatformInfo(curPlatform, CL_PLATFORM_EXTENSIONS, extSize,
496  &extStr.front(), nullptr);
497 
498  if (ret_err != CL_SUCCESS ||
499  extStr.find("cl_khr_il_program") == std::string::npos) {
500  if (res_program != nullptr)
501  *res_program = nullptr;
502  return cast<pi_result>(CL_INVALID_CONTEXT);
503  }
504 
505  using apiFuncT =
506  cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *);
507  apiFuncT funcPtr =
508  reinterpret_cast<apiFuncT>(clGetExtensionFunctionAddressForPlatform(
509  curPlatform, "clCreateProgramWithILKHR"));
510 
511  assert(funcPtr != nullptr);
512  if (res_program != nullptr)
513  *res_program = cast<pi_program>(
514  funcPtr(cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
515  else
516  err = PI_INVALID_VALUE;
517 
518  return err;
519 }
520 
522  pi_context, bool,
523  pi_program *piProgram) {
524  assert(piProgram != nullptr);
525  *piProgram = reinterpret_cast<pi_program>(nativeHandle);
526  return PI_SUCCESS;
527 }
528 
530  const pi_sampler_properties *sampler_properties,
531  pi_sampler *result_sampler) {
532  // Initialize properties according to OpenCL 2.1 spec.
533  pi_result error_code;
534  pi_bool normalizedCoords = PI_TRUE;
537 
538  // Unpack sampler properties
539  for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0;
540  ++i) {
541  if (sampler_properties[i] == PI_SAMPLER_INFO_NORMALIZED_COORDS) {
542  normalizedCoords = static_cast<pi_bool>(sampler_properties[++i]);
543  } else if (sampler_properties[i] == PI_SAMPLER_INFO_ADDRESSING_MODE) {
544  addressingMode =
545  static_cast<pi_sampler_addressing_mode>(sampler_properties[++i]);
546  } else if (sampler_properties[i] == PI_SAMPLER_INFO_FILTER_MODE) {
547  filterMode = static_cast<pi_sampler_filter_mode>(sampler_properties[++i]);
548  } else {
549  assert(false && "Cannot recognize sampler property");
550  }
551  }
552 
553  // Always call OpenCL 1.0 API
554  *result_sampler = cast<pi_sampler>(
555  clCreateSampler(cast<cl_context>(context), normalizedCoords,
556  addressingMode, filterMode, cast<cl_int *>(&error_code)));
557  return error_code;
558 }
559 
561  const pi_mem *arg_value) {
562  return cast<pi_result>(
563  clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
564  sizeof(arg_value), cast<const cl_mem *>(arg_value)));
565 }
566 
568  const pi_sampler *arg_value) {
569  return cast<pi_result>(
570  clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
571  sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
572 }
573 
575  pi_context, pi_program, bool,
576  pi_kernel *piKernel) {
577  assert(piKernel != nullptr);
578  *piKernel = reinterpret_cast<pi_kernel>(nativeHandle);
579  return PI_SUCCESS;
580 }
581 
582 // Function gets characters between delimeter's in str
583 // then checks if they are equal to the sub_str.
584 // returns true if there is at least one instance
585 // returns false if there are no instances of the name
586 static bool is_in_separated_string(const std::string &str, char delimiter,
587  const std::string &sub_str) {
588  size_t beg = 0;
589  size_t length = 0;
590  for (const auto &x : str) {
591  if (x == delimiter) {
592  if (str.substr(beg, length) == sub_str)
593  return true;
594 
595  beg += length + 1;
596  length = 0;
597  continue;
598  }
599  length++;
600  }
601  if (length != 0)
602  if (str.substr(beg, length) == sub_str)
603  return true;
604 
605  return false;
606 }
607 
608 typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)(
609  cl_device_id device, cl_program program, const char *FuncName,
610  cl_ulong *ret_ptr);
612  const char *func_name,
613  pi_uint64 *function_pointer_ret) {
614 
615  cl_context CLContext = nullptr;
616  cl_int ret_err =
617  clGetProgramInfo(cast<cl_program>(program), CL_PROGRAM_CONTEXT,
618  sizeof(CLContext), &CLContext, nullptr);
619 
620  if (ret_err != CL_SUCCESS)
621  return cast<pi_result>(ret_err);
622 
623  clGetDeviceFunctionPointer_fn FuncT = nullptr;
626  cast<pi_context>(CLContext), &FuncT);
627 
628  pi_result pi_ret_err = PI_SUCCESS;
629 
630  // Check if kernel name exists, to prevent opencl runtime throwing exception
631  // with cpu runtime
632  // TODO: Use fallback search method if extension does not exist once CPU
633  // runtime no longer throws exceptions and prints messages when given
634  // unavailable functions.
635  *function_pointer_ret = 0;
636  size_t Size;
637  cl_int Res =
638  clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
639  0, nullptr, &Size);
640  if (Res != CL_SUCCESS)
641  return cast<pi_result>(Res);
642 
643  std::string ClResult(Size, ' ');
644  ret_err =
645  clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
646  ClResult.size(), &ClResult[0], nullptr);
647  if (Res != CL_SUCCESS)
648  return cast<pi_result>(Res);
649 
650  // Get rid of the null terminator and search for kernel_name
651  // If function cannot be found return error code to indicate it
652  // exists
653  ClResult.pop_back();
654  if (!is_in_separated_string(ClResult, ';', func_name))
655  return PI_INVALID_KERNEL_NAME;
656 
658 
659  // If clGetDeviceFunctionPointer is in list of extensions
660  if (FuncT) {
661  pi_ret_err = cast<pi_result>(FuncT(cast<cl_device_id>(device),
662  cast<cl_program>(program), func_name,
663  function_pointer_ret));
664  // GPU runtime sometimes returns PI_INVALID_ARG_VALUE if func address cannot
665  // be found even if kernel exits. As the kernel does exist return that the
666  // address is not available
667  if (pi_ret_err == CL_INVALID_ARG_VALUE) {
668  *function_pointer_ret = 0;
670  }
671  }
672  return pi_ret_err;
673 }
674 
676  pi_uint32 num_devices, const pi_device *devices,
677  void (*pfn_notify)(const char *errinfo,
678  const void *private_info,
679  size_t cb, void *user_data1),
680  void *user_data, pi_context *retcontext) {
682  *retcontext = cast<pi_context>(
683  clCreateContext(properties, cast<cl_uint>(num_devices),
684  cast<const cl_device_id *>(devices), pfn_notify,
685  user_data, cast<cl_int *>(&ret)));
686 
687  return ret;
688 }
689 
691  pi_uint32 num_devices,
692  const pi_device *devices,
693  bool ownNativeHandle,
694  pi_context *piContext) {
695  (void)num_devices;
696  (void)devices;
697  (void)ownNativeHandle;
698  assert(piContext != nullptr);
699  assert(ownNativeHandle == false);
700  *piContext = reinterpret_cast<pi_context>(nativeHandle);
701  return PI_SUCCESS;
702 }
703 
705  void *host_ptr, pi_mem *ret_mem,
706  const pi_mem_properties *properties) {
708  if (properties) {
709  // TODO: need to check if all properties are supported by OpenCL RT and
710  // ignore unsupported
711  clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
712  // First we need to look up the function pointer
714  clCreateBufferWithPropertiesINTEL_fn>(
715  context, &FuncPtr);
716  if (FuncPtr) {
717  *ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
718  cast<cl_mem_flags>(flags), size, host_ptr,
719  cast<cl_int *>(&ret_err)));
720  return ret_err;
721  }
722  }
723 
724  *ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
725  cast<cl_mem_flags>(flags), size,
726  host_ptr, cast<cl_int *>(&ret_err)));
727  return ret_err;
728 }
729 
731  const pi_image_format *image_format,
732  const pi_image_desc *image_desc, void *host_ptr,
733  pi_mem *ret_mem) {
735  *ret_mem = cast<pi_mem>(
736  clCreateImage(cast<cl_context>(context), cast<cl_mem_flags>(flags),
737  cast<const cl_image_format *>(image_format),
738  cast<const cl_image_desc *>(image_desc), host_ptr,
739  cast<cl_int *>(&ret_err)));
740 
741  return ret_err;
742 }
743 
745  pi_buffer_create_type buffer_create_type,
746  void *buffer_create_info, pi_mem *ret_mem) {
747 
749  *ret_mem = cast<pi_mem>(
750  clCreateSubBuffer(cast<cl_mem>(buffer), cast<cl_mem_flags>(flags),
751  cast<cl_buffer_create_type>(buffer_create_type),
752  buffer_create_info, cast<cl_int *>(&ret_err)));
753  return ret_err;
754 }
755 
758  bool ownNativeHandle, pi_mem *piMem) {
759  (void)context;
760  (void)ownNativeHandle;
761  assert(piMem != nullptr);
762  *piMem = reinterpret_cast<pi_mem>(nativeHandle);
763  return PI_SUCCESS;
764 }
765 
767  const char **strings,
768  const size_t *lengths,
769  pi_program *ret_program) {
770 
772  *ret_program = cast<pi_program>(
773  clCreateProgramWithSource(cast<cl_context>(context), cast<cl_uint>(count),
774  strings, lengths, cast<cl_int *>(&ret_err)));
775  return ret_err;
776 }
777 
779  pi_context context, pi_uint32 num_devices, const pi_device *device_list,
780  const size_t *lengths, const unsigned char **binaries,
781  size_t num_metadata_entries, const pi_device_binary_property *metadata,
782  pi_int32 *binary_status, pi_program *ret_program) {
783  (void)metadata;
784  (void)num_metadata_entries;
785 
787  *ret_program = cast<pi_program>(clCreateProgramWithBinary(
788  cast<cl_context>(context), cast<cl_uint>(num_devices),
789  cast<const cl_device_id *>(device_list), lengths, binaries,
790  cast<cl_int *>(binary_status), cast<cl_int *>(&ret_err)));
791  return ret_err;
792 }
793 
795  const pi_device *device_list, const char *options,
796  pi_uint32 num_input_programs,
797  const pi_program *input_programs,
798  void (*pfn_notify)(pi_program program, void *user_data),
799  void *user_data, pi_program *ret_program) {
800 
802  *ret_program = cast<pi_program>(
803  clLinkProgram(cast<cl_context>(context), cast<cl_uint>(num_devices),
804  cast<const cl_device_id *>(device_list), options,
805  cast<cl_uint>(num_input_programs),
806  cast<const cl_program *>(input_programs),
807  cast<void (*)(cl_program, void *)>(pfn_notify), user_data,
808  cast<cl_int *>(&ret_err)));
809  return ret_err;
810 }
811 
812 pi_result piKernelCreate(pi_program program, const char *kernel_name,
813  pi_kernel *ret_kernel) {
814 
816  *ret_kernel = cast<pi_kernel>(clCreateKernel(
817  cast<cl_program>(program), kernel_name, cast<cl_int *>(&ret_err)));
818  return ret_err;
819 }
820 
822  pi_kernel_group_info param_name,
823  size_t param_value_size, void *param_value,
824  size_t *param_value_size_ret) {
825  if (kernel == nullptr) {
826  return PI_INVALID_KERNEL;
827  }
828 
829  switch (param_name) {
831  return PI_INVALID_VALUE;
832  default:
833  cl_int result = clGetKernelWorkGroupInfo(
834  cast<cl_kernel>(kernel), cast<cl_device_id>(device),
835  cast<cl_kernel_work_group_info>(param_name), param_value_size,
836  param_value, param_value_size_ret);
837  return static_cast<pi_result>(result);
838  }
839 }
840 
842  pi_kernel_sub_group_info param_name,
843  size_t input_value_size,
844  const void *input_value,
845  size_t param_value_size, void *param_value,
846  size_t *param_value_size_ret) {
847  (void)param_value_size;
848  size_t ret_val;
849  cl_int ret_err;
850  ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
851  cast<cl_kernel>(kernel), cast<cl_device_id>(device),
852  cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
853  sizeof(size_t), &ret_val, param_value_size_ret));
854 
855  if (ret_err != CL_SUCCESS)
856  return cast<pi_result>(ret_err);
857 
858  *(static_cast<uint32_t *>(param_value)) = static_cast<uint32_t>(ret_val);
859  if (param_value_size_ret)
860  *param_value_size_ret = sizeof(uint32_t);
861  return PI_SUCCESS;
862 }
863 
865 
867  *ret_event = cast<pi_event>(
868  clCreateUserEvent(cast<cl_context>(context), cast<cl_int *>(&ret_err)));
869  return ret_err;
870 }
871 
874  bool ownNativeHandle,
875  pi_event *piEvent) {
876  (void)context;
877  // TODO: ignore this, but eventually want to return error as unsupported
878  (void)ownNativeHandle;
879 
880  assert(piEvent != nullptr);
881  assert(nativeHandle);
882  assert(context);
883 
884  *piEvent = reinterpret_cast<pi_event>(nativeHandle);
885  return PI_SUCCESS;
886 }
887 
889  pi_bool blocking_map, pi_map_flags map_flags,
890  size_t offset, size_t size,
891  pi_uint32 num_events_in_wait_list,
892  const pi_event *event_wait_list,
893  pi_event *event, void **ret_map) {
894 
896  *ret_map = cast<void *>(clEnqueueMapBuffer(
897  cast<cl_command_queue>(command_queue), cast<cl_mem>(buffer),
898  cast<cl_bool>(blocking_map), map_flags, offset, size,
899  cast<cl_uint>(num_events_in_wait_list),
900  cast<const cl_event *>(event_wait_list), cast<cl_event *>(event),
901  cast<cl_int *>(&ret_err)));
902  return ret_err;
903 }
904 
905 //
906 // USM
907 //
908 
917  pi_usm_mem_properties *properties, size_t size,
918  pi_uint32 alignment) {
919 
920  void *Ptr = nullptr;
922 
923  // First we need to look up the function pointer
924  clHostMemAllocINTEL_fn FuncPtr = nullptr;
925  RetVal = getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
926  context, &FuncPtr);
927 
928  if (FuncPtr) {
929  Ptr = FuncPtr(cast<cl_context>(context),
930  cast<cl_mem_properties_intel *>(properties), size, alignment,
931  cast<cl_int *>(&RetVal));
932  }
933 
934  *result_ptr = Ptr;
935 
936  // ensure we aligned the allocation correctly
937  if (RetVal == PI_SUCCESS && alignment != 0)
938  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0 &&
939  "allocation not aligned correctly");
940 
941  return RetVal;
942 }
943 
954  pi_usm_mem_properties *properties, size_t size,
955  pi_uint32 alignment) {
956 
957  void *Ptr = nullptr;
959 
960  // First we need to look up the function pointer
961  clDeviceMemAllocINTEL_fn FuncPtr = nullptr;
962  RetVal =
963  getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
964  context, &FuncPtr);
965 
966  if (FuncPtr) {
967  Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
968  cast<cl_mem_properties_intel *>(properties), size, alignment,
969  cast<cl_int *>(&RetVal));
970  }
971 
972  *result_ptr = Ptr;
973 
974  // ensure we aligned the allocation correctly
975  if (RetVal == PI_SUCCESS && alignment != 0)
976  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0 &&
977  "allocation not aligned correctly");
978 
979  return RetVal;
980 }
981 
992  pi_usm_mem_properties *properties, size_t size,
993  pi_uint32 alignment) {
994 
995  void *Ptr = nullptr;
997 
998  // First we need to look up the function pointer
999  clSharedMemAllocINTEL_fn FuncPtr = nullptr;
1000  RetVal =
1001  getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
1002  context, &FuncPtr);
1003 
1004  if (FuncPtr) {
1005  Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
1006  cast<cl_mem_properties_intel *>(properties), size, alignment,
1007  cast<cl_int *>(&RetVal));
1008  }
1009 
1010  *result_ptr = Ptr;
1011 
1012  assert(alignment == 0 ||
1013  (RetVal == PI_SUCCESS &&
1014  reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
1015  return RetVal;
1016 }
1017 
1023  // Use a blocking free to avoid issues with indirect access from kernels that
1024  // might be still running.
1025  clMemBlockingFreeINTEL_fn FuncPtr = nullptr;
1026 
1027  // We need to use clMemBlockingFreeINTEL here, however, due to a bug in OpenCL
1028  // CPU runtime this call fails with CL_INVALID_EVENT on CPU devices in certain
1029  // cases. As a temporary workaround, this function replicates caching of
1030  // extension function pointers in getExtFuncFromContext, while choosing
1031  // clMemBlockingFreeINTEL for GPU and clMemFreeINTEL for other device types.
1032  // TODO remove this workaround when the new OpenCL CPU runtime version is
1033  // uplifted in CI.
1034  static_assert(
1035  std::is_same<clMemBlockingFreeINTEL_fn, clMemFreeINTEL_fn>::value);
1036  cl_uint deviceCount;
1037  cl_int ret_err =
1038  clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
1039  sizeof(cl_uint), &deviceCount, nullptr);
1040 
1041  if (ret_err != CL_SUCCESS || deviceCount < 1) {
1042  return PI_INVALID_CONTEXT;
1043  }
1044 
1045  std::vector<cl_device_id> devicesInCtx(deviceCount);
1046  ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
1047  deviceCount * sizeof(cl_device_id),
1048  devicesInCtx.data(), nullptr);
1049 
1050  if (ret_err != CL_SUCCESS) {
1051  return PI_INVALID_CONTEXT;
1052  }
1053 
1054  bool useBlockingFree = true;
1055  for (const cl_device_id &dev : devicesInCtx) {
1056  cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
1057  ret_err = clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(cl_device_type),
1058  &devType, nullptr);
1059  if (ret_err != CL_SUCCESS) {
1060  return PI_INVALID_DEVICE;
1061  }
1062  useBlockingFree &= devType == CL_DEVICE_TYPE_GPU;
1063  }
1064 
1066  if (useBlockingFree)
1067  RetVal =
1068  getExtFuncFromContext<clMemBlockingFreeName, clMemBlockingFreeINTEL_fn>(
1069  context, &FuncPtr);
1070  else
1071  RetVal = getExtFuncFromContext<clMemFreeName, clMemFreeINTEL_fn>(context,
1072  &FuncPtr);
1073 
1074  if (FuncPtr) {
1075  RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr));
1076  }
1077 
1078  return RetVal;
1079 }
1080 
1089  size_t arg_size, const void *arg_value) {
1090  (void)arg_size;
1091 
1092  // Size is unused in CL as pointer args are passed by value.
1093 
1094  // Have to look up the context from the kernel
1095  cl_context CLContext;
1096  cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
1097  sizeof(cl_context), &CLContext, nullptr);
1098  if (CLErr != CL_SUCCESS) {
1099  return cast<pi_result>(CLErr);
1100  }
1101 
1102  clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr;
1104  clSetKernelArgMemPointerINTEL_fn>(
1105  cast<pi_context>(CLContext), &FuncPtr);
1106 
1107  if (FuncPtr) {
1108  // OpenCL passes pointers by value not by reference
1109  // This means we need to deref the arg to get the pointer value
1110  auto PtrToPtr = reinterpret_cast<const intptr_t *>(arg_value);
1111  auto DerefPtr = reinterpret_cast<void *>(*PtrToPtr);
1112  RetVal =
1113  cast<pi_result>(FuncPtr(cast<cl_kernel>(kernel), arg_index, DerefPtr));
1114  }
1115 
1116  return RetVal;
1117 }
1118 
1130  size_t count, pi_uint32 num_events_in_waitlist,
1131  const pi_event *events_waitlist,
1132  pi_event *event) {
1133 
1134  // Have to look up the context from the kernel
1135  cl_context CLContext;
1136  cl_int CLErr =
1137  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1138  sizeof(cl_context), &CLContext, nullptr);
1139  if (CLErr != CL_SUCCESS) {
1140  return cast<pi_result>(CLErr);
1141  }
1142 
1143  clEnqueueMemsetINTEL_fn FuncPtr = nullptr;
1144  pi_result RetVal =
1145  getExtFuncFromContext<clEnqueueMemsetName, clEnqueueMemsetINTEL_fn>(
1146  cast<pi_context>(CLContext), &FuncPtr);
1147 
1148  if (FuncPtr) {
1149  RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue), ptr, value,
1150  count, num_events_in_waitlist,
1151  cast<const cl_event *>(events_waitlist),
1152  cast<cl_event *>(event)));
1153  }
1154 
1155  return RetVal;
1156 }
1157 
1169  const void *src_ptr, size_t size,
1170  pi_uint32 num_events_in_waitlist,
1171  const pi_event *events_waitlist,
1172  pi_event *event) {
1173 
1174  // Have to look up the context from the kernel
1175  cl_context CLContext;
1176  cl_int CLErr =
1177  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1178  sizeof(cl_context), &CLContext, nullptr);
1179  if (CLErr != CL_SUCCESS) {
1180  return cast<pi_result>(CLErr);
1181  }
1182 
1183  clEnqueueMemcpyINTEL_fn FuncPtr = nullptr;
1184  pi_result RetVal =
1185  getExtFuncFromContext<clEnqueueMemcpyName, clEnqueueMemcpyINTEL_fn>(
1186  cast<pi_context>(CLContext), &FuncPtr);
1187 
1188  if (FuncPtr) {
1189  RetVal = cast<pi_result>(
1190  FuncPtr(cast<cl_command_queue>(queue), blocking, dst_ptr, src_ptr, size,
1191  num_events_in_waitlist, cast<const cl_event *>(events_waitlist),
1192  cast<cl_event *>(event)));
1193  }
1194 
1195  return RetVal;
1196 }
1197 
1207 pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size,
1208  pi_usm_migration_flags flags,
1209  pi_uint32 num_events_in_waitlist,
1210  const pi_event *events_waitlist,
1211  pi_event *event) {
1212  (void)ptr;
1213  (void)size;
1214 
1215  // flags is currently unused so fail if set
1216  if (flags != 0)
1217  return PI_INVALID_VALUE;
1218 
1219  return cast<pi_result>(clEnqueueMarkerWithWaitList(
1220  cast<cl_command_queue>(queue), num_events_in_waitlist,
1221  cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
1222 
1223  /*
1224  // Use this once impls support it.
1225  // Have to look up the context from the kernel
1226  cl_context CLContext;
1227  cl_int CLErr =
1228  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1229  sizeof(cl_context), &CLContext, nullptr);
1230  if (CLErr != CL_SUCCESS) {
1231  return cast<pi_result>(CLErr);
1232  }
1233 
1234  clEnqueueMigrateMemINTEL_fn FuncPtr;
1235  pi_result Err = getExtFuncFromContext<clEnqueueMigrateMemINTEL_fn>(
1236  cast<pi_context>(CLContext), "clEnqueueMigrateMemINTEL", &FuncPtr);
1237 
1238  if (Err != PI_SUCCESS) {
1239  RetVal = Err;
1240  } else {
1241  RetVal = cast<pi_result>(FuncPtr(
1242  cast<cl_command_queue>(queue), ptr, size, flags, num_events_in_waitlist,
1243  reinterpret_cast<const cl_event *>(events_waitlist),
1244  reinterpret_cast<cl_event *>(event)));
1245  }
1246  */
1247 }
1248 
1256 // USM memadvise API to govern behavior of automatic migration mechanisms
1258  size_t length, pi_mem_advice advice,
1259  pi_event *event) {
1260  (void)ptr;
1261  (void)length;
1262  (void)advice;
1263 
1264  return cast<pi_result>(
1265  clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0, nullptr,
1266  reinterpret_cast<cl_event *>(event)));
1267 
1268  /*
1269  // Change to use this once drivers support it.
1270 
1271  // Have to look up the context from the kernel
1272  cl_context CLContext;
1273  cl_int CLErr = clGetCommandQueueInfo(cast<cl_command_queue>(queue),
1274  CL_QUEUE_CONTEXT,
1275  sizeof(cl_context),
1276  &CLContext, nullptr);
1277  if (CLErr != CL_SUCCESS) {
1278  return cast<pi_result>(CLErr);
1279  }
1280 
1281  clEnqueueMemAdviseINTEL_fn FuncPtr;
1282  pi_result Err =
1283  getExtFuncFromContext<clEnqueueMemAdviseINTEL_fn>(
1284  cast<pi_context>(CLContext), "clEnqueueMemAdviseINTEL", &FuncPtr);
1285 
1286  if (Err != PI_SUCCESS) {
1287  RetVal = Err;
1288  } else {
1289  RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue),
1290  ptr, length, advice, 0, nullptr,
1291  reinterpret_cast<cl_event *>(event)));
1292  }
1293  */
1294 }
1295 
1313  pi_mem_alloc_info param_name,
1314  size_t param_value_size, void *param_value,
1315  size_t *param_value_size_ret) {
1316 
1317  clGetMemAllocInfoINTEL_fn FuncPtr = nullptr;
1318  pi_result RetVal =
1319  getExtFuncFromContext<clGetMemAllocInfoName, clGetMemAllocInfoINTEL_fn>(
1320  context, &FuncPtr);
1321 
1322  if (FuncPtr) {
1323  RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr, param_name,
1324  param_value_size, param_value,
1325  param_value_size_ret));
1326  }
1327 
1328  return RetVal;
1329 }
1330 
1343  size_t param_value_size,
1344  const void *param_value) {
1345  if (param_name == PI_USM_INDIRECT_ACCESS &&
1346  *(static_cast<const pi_bool *>(param_value)) == PI_TRUE) {
1347  return USMSetIndirectAccess(kernel);
1348  } else {
1349  return cast<pi_result>(clSetKernelExecInfo(
1350  cast<cl_kernel>(kernel), param_name, param_value_size, param_value));
1351  }
1352 }
1353 
1354 typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
1355  cl_program program, cl_uint spec_id, size_t spec_size,
1356  const void *spec_value);
1357 
1359  pi_uint32 spec_id,
1360  size_t spec_size,
1361  const void *spec_value) {
1362  cl_program ClProg = cast<cl_program>(prog);
1363  cl_context Ctx = nullptr;
1364  size_t RetSize = 0;
1365  cl_int Res =
1366  clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), &Ctx, &RetSize);
1367 
1368  if (Res != CL_SUCCESS)
1369  return cast<pi_result>(Res);
1370 
1373  decltype(F)>(cast<pi_context>(Ctx), &F);
1374 
1375  if (!F || Res != CL_SUCCESS)
1376  return PI_INVALID_OPERATION;
1377  Res = F(ClProg, spec_id, spec_size, spec_value);
1378  return cast<pi_result>(Res);
1379 }
1380 
1387 static pi_result piextGetNativeHandle(void *piObj,
1388  pi_native_handle *nativeHandle) {
1389  assert(nativeHandle != nullptr);
1390  *nativeHandle = reinterpret_cast<pi_native_handle>(piObj);
1391  return PI_SUCCESS;
1392 }
1393 
1395  pi_native_handle *nativeHandle) {
1396  return piextGetNativeHandle(platform, nativeHandle);
1397 }
1398 
1400  pi_native_handle *nativeHandle) {
1401  return piextGetNativeHandle(device, nativeHandle);
1402 }
1403 
1405  pi_native_handle *nativeHandle) {
1406  return piextGetNativeHandle(context, nativeHandle);
1407 }
1408 
1410  pi_native_handle *nativeHandle) {
1411  return piextGetNativeHandle(queue, nativeHandle);
1412 }
1413 
1415  return piextGetNativeHandle(mem, nativeHandle);
1416 }
1417 
1419  pi_native_handle *nativeHandle) {
1420  return piextGetNativeHandle(program, nativeHandle);
1421 }
1422 
1424  pi_native_handle *nativeHandle) {
1425  return piextGetNativeHandle(kernel, nativeHandle);
1426 }
1427 
1428 // This API is called by Sycl RT to notify the end of the plugin lifetime.
1429 // TODO: add a global variable lifetime management code here (see
1430 // pi_level_zero.cpp for reference) Currently this is just a NOOP.
1431 pi_result piTearDown(void *PluginParameter) {
1432  (void)PluginParameter;
1433  return PI_SUCCESS;
1434 }
1435 
1437  int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion);
1438  if (CompareVersions < 0) {
1439  // PI interface supports lower version of PI.
1440  // TODO: Take appropriate actions.
1441  return PI_INVALID_OPERATION;
1442  }
1443 
1444  // PI interface supports higher version or the same version.
1445  size_t PluginVersionSize = sizeof(PluginInit->PluginVersion);
1446  if (strlen(SupportedVersion) >= PluginVersionSize)
1447  return PI_INVALID_VALUE;
1448  strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize);
1449 
1450 #define _PI_CL(pi_api, ocl_api) \
1451  (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
1452 
1453  // Platform
1455  _PI_CL(piPlatformGetInfo, clGetPlatformInfo)
1459  // Device
1462  _PI_CL(piDevicePartition, clCreateSubDevices)
1463  _PI_CL(piDeviceRetain, clRetainDevice)
1464  _PI_CL(piDeviceRelease, clReleaseDevice)
1469  // Context
1471  _PI_CL(piContextGetInfo, clGetContextInfo)
1472  _PI_CL(piContextRetain, clRetainContext)
1473  _PI_CL(piContextRelease, clReleaseContext)
1476  // Queue
1478  _PI_CL(piQueueGetInfo, clGetCommandQueueInfo)
1479  _PI_CL(piQueueFinish, clFinish)
1480  _PI_CL(piQueueFlush, clFlush)
1481  _PI_CL(piQueueRetain, clRetainCommandQueue)
1482  _PI_CL(piQueueRelease, clReleaseCommandQueue)
1485  // Memory
1488  _PI_CL(piMemGetInfo, clGetMemObjectInfo)
1489  _PI_CL(piMemImageGetInfo, clGetImageInfo)
1490  _PI_CL(piMemRetain, clRetainMemObject)
1491  _PI_CL(piMemRelease, clReleaseMemObject)
1495  // Program
1499  _PI_CL(piProgramGetInfo, clGetProgramInfo)
1500  _PI_CL(piProgramCompile, clCompileProgram)
1501  _PI_CL(piProgramBuild, clBuildProgram)
1503  _PI_CL(piProgramGetBuildInfo, clGetProgramBuildInfo)
1504  _PI_CL(piProgramRetain, clRetainProgram)
1505  _PI_CL(piProgramRelease, clReleaseProgram)
1510  // Kernel
1512  _PI_CL(piKernelSetArg, clSetKernelArg)
1513  _PI_CL(piKernelGetInfo, clGetKernelInfo)
1516  _PI_CL(piKernelRetain, clRetainKernel)
1517  _PI_CL(piKernelRelease, clReleaseKernel)
1522  // Event
1524  _PI_CL(piEventGetInfo, clGetEventInfo)
1525  _PI_CL(piEventGetProfilingInfo, clGetEventProfilingInfo)
1526  _PI_CL(piEventsWait, clWaitForEvents)
1527  _PI_CL(piEventSetCallback, clSetEventCallback)
1528  _PI_CL(piEventSetStatus, clSetUserEventStatus)
1529  _PI_CL(piEventRetain, clRetainEvent)
1530  _PI_CL(piEventRelease, clReleaseEvent)
1533  // Sampler
1535  _PI_CL(piSamplerGetInfo, clGetSamplerInfo)
1536  _PI_CL(piSamplerRetain, clRetainSampler)
1537  _PI_CL(piSamplerRelease, clReleaseSampler)
1538  // Queue commands
1539  _PI_CL(piEnqueueKernelLaunch, clEnqueueNDRangeKernel)
1540  _PI_CL(piEnqueueNativeKernel, clEnqueueNativeKernel)
1541  _PI_CL(piEnqueueEventsWait, clEnqueueMarkerWithWaitList)
1542  _PI_CL(piEnqueueEventsWaitWithBarrier, clEnqueueBarrierWithWaitList)
1543  _PI_CL(piEnqueueMemBufferRead, clEnqueueReadBuffer)
1544  _PI_CL(piEnqueueMemBufferReadRect, clEnqueueReadBufferRect)
1545  _PI_CL(piEnqueueMemBufferWrite, clEnqueueWriteBuffer)
1546  _PI_CL(piEnqueueMemBufferWriteRect, clEnqueueWriteBufferRect)
1547  _PI_CL(piEnqueueMemBufferCopy, clEnqueueCopyBuffer)
1548  _PI_CL(piEnqueueMemBufferCopyRect, clEnqueueCopyBufferRect)
1549  _PI_CL(piEnqueueMemBufferFill, clEnqueueFillBuffer)
1550  _PI_CL(piEnqueueMemImageRead, clEnqueueReadImage)
1551  _PI_CL(piEnqueueMemImageWrite, clEnqueueWriteImage)
1552  _PI_CL(piEnqueueMemImageCopy, clEnqueueCopyImage)
1553  _PI_CL(piEnqueueMemImageFill, clEnqueueFillImage)
1555  _PI_CL(piEnqueueMemUnmap, clEnqueueUnmapMemObject)
1556  // USM
1566 
1571 
1572 #undef _PI_CL
1573 
1574  return PI_SUCCESS;
1575 }
1576 
1577 } // end extern 'C'
piclProgramCreateWithSource
pi_result piclProgramCreateWithSource(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, pi_program *ret_program)
Definition: pi_opencl.cpp:766
piContextCreate
pi_result piContextCreate(const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data1), void *user_data, pi_context *retcontext)
Definition: pi_opencl.cpp:675
piEventGetProfilingInfo
pi_result piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1391
_pi_mem
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:207
piEventRelease
pi_result piEventRelease(pi_event event)
Definition: pi_esimd_emulator.cpp:1442
PI_INVALID_KERNEL
@ PI_INVALID_KERNEL
Definition: pi.h:89
piEnqueueKernelLaunch
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1759
piextPlatformGetNativeHandle
pi_result piextPlatformGetNativeHandle(pi_platform platform, pi_native_handle *nativeHandle)
Gets the native handle of a PI platform object.
Definition: pi_opencl.cpp:1394
piMemGetInfo
pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1081
piextProgramGetNativeHandle
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
Definition: pi_opencl.cpp:1418
PI_SUCCESS
@ PI_SUCCESS
Definition: pi.h:86
clGetMemAllocInfoName
CONSTFIX char clGetMemAllocInfoName[]
Definition: pi_opencl.cpp:66
pi.h
piKernelSetArg
pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Definition: pi_esimd_emulator.cpp:1353
piPluginInit
pi_result piPluginInit(pi_plugin *PluginInit)
Definition: pi_opencl.cpp:1436
piextProgramCreateWithNativeHandle
pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, bool, pi_program *piProgram)
Creates PI program object from a native handle.
Definition: pi_opencl.cpp:521
PI_DEVICE_INFO_GPU_EU_COUNT
@ PI_DEVICE_INFO_GPU_EU_COUNT
Definition: pi.h:307
piMemBufferCreate
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties)
Definition: pi_opencl.cpp:704
pi_bool
pi_uint32 pi_bool
Definition: pi.h:74
T
cl::sycl::info::device
device
Definition: info_desc.hpp:53
piPlatformsGet
pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
Definition: pi_opencl.cpp:275
CHECK_ERR_SET_NULL_RET
#define CHECK_ERR_SET_NULL_RET(err, ptr, reterr)
Definition: pi_opencl.cpp:32
piKernelGetGroupInfo
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_opencl.cpp:821
PI_INVALID_OPERATION
@ PI_INVALID_OPERATION
Definition: pi.h:88
piextGetDeviceFunctionPointer
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *function_pointer_ret)
Retrieves a device function pointer to a user-defined function.
Definition: pi_opencl.cpp:611
clCreateBufferWithPropertiesName
CONSTFIX char clCreateBufferWithPropertiesName[]
Definition: pi_opencl.cpp:61
PI_DEVICE_INFO_IMAGE_SRGB
@ PI_DEVICE_INFO_IMAGE_SRGB
Definition: pi.h:313
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_esimd_emulator.cpp:1336
piextProgramSetSpecializationConstant
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_opencl.cpp:1358
_pi_plugin
Definition: pi.h:1822
piDevicePartition
pi_result piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
Definition: pi_esimd_emulator.cpp:813
is_in_separated_string
static bool is_in_separated_string(const std::string &str, char delimiter, const std::string &sub_str)
Definition: pi_opencl.cpp:586
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:459
piEnqueueMemBufferCopy
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1570
clSharedMemAllocName
CONSTFIX char clSharedMemAllocName[]
Definition: pi_opencl.cpp:58
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
@ PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
Definition: pi.h:319
piDevicesGet
pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
Definition: pi_opencl.cpp:298
_pi_result
_pi_result
Definition: pi.h:85
piTearDown
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
Definition: pi_opencl.cpp:1431
pi_context_properties
intptr_t pi_context_properties
Definition: pi.h:541
piEnqueueMemUnmap
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1636
PI_SAMPLER_INFO_FILTER_MODE
@ PI_SAMPLER_INFO_FILTER_MODE
Definition: pi.h:522
piextKernelGetNativeHandle
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
Definition: pi_opencl.cpp:1423
piSamplerGetInfo
pi_result piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1476
piEventCreate
pi_result piEventCreate(pi_context context, pi_event *ret_event)
Definition: pi_opencl.cpp:864
piextUSMEnqueueMemset
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
Definition: pi_opencl.cpp:1129
piPlatformGetInfo
pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:439
PI_DEVICE_INFO_GPU_SLICES
@ PI_DEVICE_INFO_GPU_SLICES
Definition: pi.h:309
piextUSMDeviceAlloc
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
Definition: pi_opencl.cpp:952
piDeviceRetain
pi_result piDeviceRetain(pi_device device)
Definition: pi_esimd_emulator.cpp:571
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
_pi_device_type
_pi_device_type
Definition: pi.h:174
cl::sycl::info::device_type
device_type
Definition: info_desc.hpp:180
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:858
piQueueRelease
pi_result piQueueRelease(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:970
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_opencl.cpp:778
CONSTFIX
#define CONSTFIX
Definition: pi_opencl.cpp:52
piMemBufferPartition
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_opencl.cpp:744
cl::sycl::info::kernel
kernel
Definition: info_desc.hpp:236
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
Definition: pi.h:311
piextUSMEnqueueMemAdvise
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_opencl.cpp:1257
piextMemCreateWithNativeHandle
pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *piMem)
Creates PI mem object from a native handle.
Definition: pi_opencl.cpp:756
cl::sycl::cl_bool
bool cl_bool
Definition: aliases.hpp:77
PI_USM_INDIRECT_ACCESS
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1327
piextUSMHostAlloc
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
Definition: pi_opencl.cpp:916
piextEventCreateWithNativeHandle
pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *piEvent)
Creates PI event object from a native handle.
Definition: pi_opencl.cpp:872
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
_pi_platform
A PI platform stores all known PI devices, in the CUDA plugin this is just a vector of available devi...
Definition: pi_cuda.hpp:63
piEnqueueMemImageFill
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1747
_pi_sampler_addressing_mode
_pi_sampler_addressing_mode
Definition: pi.h:528
piKernelRelease
pi_result piKernelRelease(pi_kernel kernel)
Definition: pi_esimd_emulator.cpp:1383
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:753
cast
To cast(From value)
Definition: pi_opencl.cpp:42
_pi_device_info
_pi_device_info
Definition: pi.h:198
SupportedVersion
const char SupportedVersion[]
Definition: pi_opencl.cpp:39
cl::sycl::info::queue
queue
Definition: info_desc.hpp:229
MaxMessageSize
constexpr size_t MaxMessageSize
Definition: pi_opencl.cpp:75
piEventSetStatus
pi_result piEventSetStatus(pi_event event, pi_int32 execution_status)
Definition: pi_esimd_emulator.cpp:1430
piextContextGetNativeHandle
pi_result piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle)
Gets the native handle of a PI context object.
Definition: pi_opencl.cpp:1404
PI_SAMPLER_FILTER_MODE_NEAREST
@ PI_SAMPLER_FILTER_MODE_NEAREST
Definition: pi.h:537
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:626
piQueueCreate
pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
Definition: pi_opencl.cpp:393
piextContextCreateWithNativeHandle
pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext)
Creates PI context object from a native handle.
Definition: pi_opencl.cpp:690
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1308
piextDeviceCreateWithNativeHandle
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform, pi_device *piDevice)
Creates PI device object from a native handle.
Definition: pi_opencl.cpp:386
cl::sycl::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
_pi_sampler_filter_mode
_pi_sampler_filter_mode
Definition: pi.h:536
_pi_buffer_create_type
_pi_buffer_create_type
Definition: pi.h:510
clMemBlockingFreeName
CONSTFIX char clMemBlockingFreeName[]
Definition: pi_opencl.cpp:60
clGetDeviceFunctionPointerName
CONSTFIX char clGetDeviceFunctionPointerName[]
Definition: pi_opencl.cpp:69
piEnqueueMemBufferMap
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_opencl.cpp:888
piextPlatformCreateWithNativeHandle
pi_result piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform *platform)
Creates PI platform object from a native handle.
Definition: pi_opencl.cpp:290
piextUSMEnqueuePrefetch
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_opencl.cpp:1207
cl::sycl::length
float length(T p) __NOEXC
Definition: builtins.hpp:1032
piEventsWait
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_esimd_emulator.cpp:1407
piContextRelease
pi_result piContextRelease(pi_context context)
Definition: pi_esimd_emulator.cpp:888
_pi_queue
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:378
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1338
pi_uint32
uint32_t pi_uint32
Definition: pi.h:72
piEnqueueMemBufferReadRect
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1548
piMemRelease
pi_result piMemRelease(pi_mem mem)
Definition: pi_esimd_emulator.cpp:1093
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:798
piEnqueueMemBufferCopyRect
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1576
piKernelGetSubGroupInfo
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information from the sub-group from a kernel.
Definition: pi_opencl.cpp:841
piProgramGetBuildInfo
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1331
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_opencl.cpp:195
clGetDeviceFunctionPointer_fn
CL_API_ENTRY cl_int(CL_API_CALL * clGetDeviceFunctionPointer_fn)(cl_device_id device, cl_program program, const char *FuncName, cl_ulong *ret_ptr)
Definition: pi_opencl.cpp:608
cl.h
_pi_kernel_exec_info
_pi_kernel_exec_info
Definition: pi.h:1325
clMemFreeName
CONSTFIX char clMemFreeName[]
Definition: pi_opencl.cpp:59
piEnqueueMemBufferWrite
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1556
pi_mem_flags
pi_bitfield pi_mem_flags
Definition: pi.h:582
piextGetNativeHandle
static pi_result piextGetNativeHandle(void *piObj, pi_native_handle *nativeHandle)
Common API for getting the native handle of a PI object.
Definition: pi_opencl.cpp:1387
piEnqueueMemImageRead
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1678
clDeviceMemAllocName
CONSTFIX char clDeviceMemAllocName[]
Definition: pi_opencl.cpp:57
ErrorMessageCode
thread_local pi_result ErrorMessageCode
Definition: pi_opencl.cpp:76
cl::sycl::host_ptr
multi_ptr< ElementType, access::address_space::global_host_space > host_ptr
Definition: pointers.hpp:32
PI_KERNEL_GROUP_INFO_NUM_REGS
@ PI_KERNEL_GROUP_INFO_NUM_REGS
Definition: pi.h:378
ErrorMessage
thread_local char ErrorMessage[MaxMessageSize]
Definition: pi_opencl.cpp:77
piQueueRetain
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:962
piEnqueueEventsWaitWithBarrier
pi_result piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1490
getExtFuncFromContext
static pi_result getExtFuncFromContext(pi_context context, T *fptr)
Definition: pi_opencl.cpp:95
piKernelRetain
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_esimd_emulator.cpp:1381
piEventSetCallback
pi_result piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, void(*pfn_notify)(pi_event event, pi_int32 event_command_status, void *user_data), void *user_data)
piEnqueueMemBufferFill
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1584
piSamplerRetain
pi_result piSamplerRetain(pi_sampler sampler)
Definition: pi_esimd_emulator.cpp:1481
piMemImageGetInfo
pi_result piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1674
piextEventGetNativeHandle
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
Definition: pi_esimd_emulator.cpp:1463
pi_uint64
uint64_t pi_uint64
Definition: pi.h:73
_pi_device_binary_property_struct
Definition: pi.h:701
pi_mem_properties
pi_bitfield pi_mem_properties
Definition: pi.h:600
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:571
PI_INVALID_KERNEL_NAME
@ PI_INVALID_KERNEL_NAME
Definition: pi.h:87
_pi_sampler
Implementation of samplers for CUDA.
Definition: pi_cuda.hpp:797
piDeviceRelease
pi_result piDeviceRelease(pi_device device)
Definition: pi_esimd_emulator.cpp:581
PI_DEVICE_INFO_UUID
@ PI_DEVICE_INFO_UUID
Definition: pi.h:304
piextUSMSharedAlloc
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
Definition: pi_opencl.cpp:990
cl::sycl::cl_ulong
std::uint64_t cl_ulong
Definition: aliases.hpp:85
PI_DEVICE_INFO_PCI_ADDRESS
@ PI_DEVICE_INFO_PCI_ADDRESS
Definition: pi.h:306
piextDeviceSelectBinary
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, pi_uint32 num_images, pi_uint32 *selected_image_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_opencl.cpp:315
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:76
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_opencl.cpp:1342
piQueueFinish
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:984
piContextRetain
pi_result piContextRetain(pi_context context)
Definition: pi_esimd_emulator.cpp:878
piPluginGetLastError
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
Definition: pi_opencl.cpp:88
pi_sampler_properties
pi_bitfield pi_sampler_properties
Definition: pi.h:549
clEnqueueMemsetName
CONSTFIX char clEnqueueMemsetName[]
Definition: pi_opencl.cpp:64
clSetProgramSpecializationConstant_fn
CL_API_ENTRY cl_int(CL_API_CALL * clSetProgramSpecializationConstant_fn)(cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value)
Definition: pi_opencl.cpp:1354
piProgramCreate
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_opencl.cpp:444
_pi_image_format
Definition: pi.h:931
PI_INVALID_CONTEXT
@ PI_INVALID_CONTEXT
Definition: pi.h:92
clHostMemAllocName
CONSTFIX char clHostMemAllocName[]
Definition: pi_opencl.cpp:56
PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE
@ PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE
PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE indicates a fallback method determines the function exists but i...
Definition: pi.h:127
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
@ PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
Definition: pi.h:318
clSetKernelArgMemPointerName
CONSTFIX char clSetKernelArgMemPointerName[]
Definition: pi_opencl.cpp:63
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
Definition: pi.h:324
PI_PROGRAM_INFO_KERNEL_NAMES
@ PI_PROGRAM_INFO_KERNEL_NAMES
Definition: pi.h:337
setErrorMessage
static void setErrorMessage(const char *message, pi_result error_code)
Definition: pi_opencl.cpp:80
USMSetIndirectAccess
static pi_result USMSetIndirectAccess(pi_kernel kernel)
Enables indirect access of pointers in kernels.
Definition: pi_opencl.cpp:153
piEnqueueMemBufferRead
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1495
PI_INVALID_VALUE
@ PI_INVALID_VALUE
Definition: pi.h:91
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:315
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1366
_PI_H_VERSION_STRING
#define _PI_H_VERSION_STRING
Definition: pi.h:55
piEventGetInfo
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1387
piEnqueueEventsWait
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1485
piMemRetain
pi_result piMemRetain(pi_mem mem)
Definition: pi_esimd_emulator.cpp:1085
cl::sycl::info::event
event
Definition: info_desc.hpp:289
PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
@ PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
Definition: pi.h:312
cl::sycl::cl_int
std::int32_t cl_int
Definition: aliases.hpp:82
piextUSMGetMemAllocInfo
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
Definition: pi_opencl.cpp:1312
pi_queue_properties
pi_bitfield pi_queue_properties
Definition: pi.h:623
PI_DEVICE_INFO_ATOMIC_64
@ PI_DEVICE_INFO_ATOMIC_64
Definition: pi.h:316
piEventRetain
pi_result piEventRetain(pi_event event)
Definition: pi_esimd_emulator.cpp:1432
_pi_image_desc
Definition: pi.h:936
piQueueGetInfo
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:958
piextKernelSetArgMemObj
pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
Definition: pi_opencl.cpp:560
_pi_event
PI Event mapping to CUevent.
Definition: pi_cuda.hpp:460
piextMemGetNativeHandle
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_opencl.cpp:1414
PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
@ PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
Definition: pi.h:310
piextDeviceGetNativeHandle
pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native handle of a PI device object.
Definition: pi_opencl.cpp:1399
piEnqueueMemBufferWriteRect
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1562
piSamplerCreate
pi_result piSamplerCreate(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
Definition: pi_opencl.cpp:529
_pi_plugin::PluginVersion
char PluginVersion[10]
Definition: pi.h:1832
piextKernelCreateWithNativeHandle
pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_program, bool, pi_kernel *piKernel)
Creates PI kernel object from a native handle.
Definition: pi_opencl.cpp:574
cl::sycl::cl_uint
std::uint32_t cl_uint
Definition: aliases.hpp:83
piMemImageCreate
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_opencl.cpp:730
piQueueFlush
pi_result piQueueFlush(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:991
_pi_usm_migration_flags
_pi_usm_migration_flags
Definition: pi.h:1658
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
Definition: pi.h:748
piEnqueueNativeKernel
pi_result piEnqueueNativeKernel(pi_queue queue, void(*user_func)(void *), void *args, size_t cb_args, pi_uint32 num_mem_objects, const pi_mem *mem_list, const void **args_mem_loc, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1829
cl::sycl::info::platform
platform
Definition: info_desc.hpp:31
piextUSMEnqueueMemcpy
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_opencl.cpp:1168
clSetProgramSpecializationConstantName
CONSTFIX char clSetProgramSpecializationConstantName[]
Definition: pi_opencl.cpp:67
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:752
cl::sycl::info::context
context
Definition: info_desc.hpp:42
piextQueueCreateWithNativeHandle
pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_device, bool ownNativeHandle, pi_queue *piQueue)
Creates PI queue object from a native handle.
Definition: pi_opencl.cpp:433
_PI_CL
#define _PI_CL(pi_api, ocl_api)
piEnqueueMemImageCopy
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1741
_pi_kernel_group_info
_pi_kernel_group_info
Definition: pi.h:368
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:794
pfn_notify
void(* pfn_notify)(pi_event event, pi_int32 eventCommandStatus, void *userData)
Definition: pi_cuda.hpp:456
piextKernelSetArgSampler
pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
Definition: pi_opencl.cpp:567
piextKernelSetArgPointer
pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Sets up pointer arguments for CL kernels.
Definition: pi_opencl.cpp:1088
piSamplerRelease
pi_result piSamplerRelease(pi_sampler sampler)
Definition: pi_esimd_emulator.cpp:1483
PI_SAMPLER_INFO_NORMALIZED_COORDS
@ PI_SAMPLER_INFO_NORMALIZED_COORDS
Definition: pi.h:520
_pi_mem_alloc_info
_pi_mem_alloc_info
Definition: pi.h:1641
PI_TRUE
const pi_bool PI_TRUE
Definition: pi.h:514
piEnqueueMemImageWrite
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1735
pi_usm_mem_properties
pi_bitfield pi_usm_mem_properties
Definition: pi.h:607
pi_map_flags
pi_bitfield pi_map_flags
Definition: pi.h:592
piProgramBuild
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:751
PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
@ PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
Definition: pi.h:308
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_opencl.cpp:812
PI_INVALID_BINARY
@ PI_INVALID_BINARY
Definition: pi.h:95
piextQueueGetNativeHandle
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle)
Gets the native handle of a PI queue object.
Definition: pi_opencl.cpp:1409
PI_INVALID_DEVICE
@ PI_INVALID_DEVICE
Definition: pi.h:94
clEnqueueMemcpyName
CONSTFIX char clEnqueueMemcpyName[]
Definition: pi_opencl.cpp:65
piextUSMFree
pi_result piextUSMFree(pi_context context, void *ptr)
Frees allocated USM memory.
Definition: pi_opencl.cpp:1022
pi_int32
int32_t pi_int32
Definition: pi.h:71
_pi_context
PI context mapping to a CUDA context object.
Definition: pi_cuda.hpp:150
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:73
_pi_plugin::PiVersion
char PiVersion[10]
Definition: pi.h:1830
_pi_kernel_sub_group_info
_pi_kernel_sub_group_info
Definition: pi.h:401
PI_SAMPLER_INFO_ADDRESSING_MODE
@ PI_SAMPLER_INFO_ADDRESSING_MODE
Definition: pi.h:521
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
@ PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
Definition: pi.h:317
PI_SAMPLER_ADDRESSING_MODE_CLAMP
@ PI_SAMPLER_ADDRESSING_MODE_CLAMP
Definition: pi.h:532