DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_opencl.cpp
Go to the documentation of this file.
1 //==---------- pi_opencl.cpp - OpenCL Plugin -------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
10 
16 
17 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
18 
19 #include <pi_opencl.hpp>
20 #include <sycl/detail/cl.h>
22 #include <sycl/detail/pi.h>
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <cstring>
27 #include <limits>
28 #include <map>
29 #include <memory>
30 #include <sstream>
31 #include <string>
32 #include <vector>
33 
34 #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \
35  if (err != CL_SUCCESS) { \
36  if (ptr != nullptr) \
37  *ptr = nullptr; \
38  return cast<pi_result>(reterr); \
39  }
40 
41 // Want all the needed casts be explicit, do not define conversion operators.
42 template <class To, class From> To cast(From value) {
43  // TODO: see if more sanity checks are possible.
44  static_assert(sizeof(From) == sizeof(To), "cast failed size check");
45  return (To)(value);
46 }
47 
48 // Older versions of GCC don't like "const" here
49 #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2))
50 #define CONSTFIX constexpr
51 #else
52 #define CONSTFIX const
53 #endif
54 
55 // Names of USM functions that are queried from OpenCL
56 CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL";
57 CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL";
58 CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL";
59 CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL";
61  "clCreateBufferWithPropertiesINTEL";
62 CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL";
63 CONSTFIX char clEnqueueMemsetName[] = "clEnqueueMemsetINTEL";
64 CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL";
65 CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL";
67  "clSetProgramSpecializationConstant";
69  "clGetDeviceFunctionPointerINTEL";
71  "clEnqueueWriteGlobalVariableINTEL";
73  "clEnqueueReadGlobalVariableINTEL";
74 
75 #undef CONSTFIX
76 
77 // Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR
78 constexpr size_t MaxMessageSize = 256;
79 thread_local pi_result ErrorMessageCode = PI_SUCCESS;
80 thread_local char ErrorMessage[MaxMessageSize];
81 
82 // Utility function for setting a message and warning
83 [[maybe_unused]] static void setErrorMessage(const char *message,
84  pi_result error_code) {
85  assert(strlen(message) <= MaxMessageSize);
86  strcpy(ErrorMessage, message);
87  ErrorMessageCode = error_code;
88 }
89 
90 // Returns plugin specific error and warning messages
92  *message = &ErrorMessage[0];
93  return ErrorMessageCode;
94 }
95 
96 static cl_int getPlatformVersion(cl_platform_id plat,
97  OCLV::OpenCLVersion &version) {
98  cl_int ret_err = CL_INVALID_VALUE;
99 
100  size_t platVerSize = 0;
101  ret_err =
102  clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0, nullptr, &platVerSize);
103 
104  std::string platVer(platVerSize, '\0');
105  ret_err = clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize,
106  platVer.data(), nullptr);
107 
108  if (ret_err != CL_SUCCESS)
109  return ret_err;
110 
111  version = OCLV::OpenCLVersion(platVer);
112  if (!version.isValid())
113  return CL_INVALID_PLATFORM;
114 
115  return ret_err;
116 }
117 
118 static cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) {
119  cl_int ret_err = CL_INVALID_VALUE;
120 
121  size_t devVerSize = 0;
122  ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize);
123 
124  std::string devVer(devVerSize, '\0');
125  ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(),
126  nullptr);
127 
128  if (ret_err != CL_SUCCESS)
129  return ret_err;
130 
131  version = OCLV::OpenCLVersion(devVer);
132  if (!version.isValid())
133  return CL_INVALID_DEVICE;
134 
135  return ret_err;
136 }
137 
138 static cl_int checkDeviceExtensions(cl_device_id dev,
139  const std::vector<std::string> &exts,
140  bool &supported) {
141  cl_int ret_err = CL_INVALID_VALUE;
142 
143  size_t extSize = 0;
144  ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize);
145 
146  std::string extStr(extSize, '\0');
147  ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(),
148  nullptr);
149 
150  if (ret_err != CL_SUCCESS)
151  return ret_err;
152 
153  supported = true;
154  for (const std::string &ext : exts)
155  if (!(supported = (extStr.find(ext) != std::string::npos)))
156  break;
157 
158  return ret_err;
159 }
160 
161 // USM helper function to get an extension function pointer
162 template <const char *FuncName, typename T>
163 static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
164  // TODO
165  // Potentially redo caching as PI interface changes.
166  thread_local static std::map<pi_context, T> FuncPtrs;
167 
168  // if cached, return cached FuncPtr
169  auto It = FuncPtrs.find(context);
170  if (It != FuncPtrs.end()) {
171  auto F = It->second;
172  // if cached that extension is not available return nullptr and
173  // PI_ERROR_INVALID_VALUE
174  *fptr = F;
175  return F ? PI_SUCCESS : PI_ERROR_INVALID_VALUE;
176  }
177 
178  cl_uint deviceCount;
179  cl_int ret_err =
180  clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
181  sizeof(cl_uint), &deviceCount, nullptr);
182 
183  if (ret_err != CL_SUCCESS || deviceCount < 1) {
184  return PI_ERROR_INVALID_CONTEXT;
185  }
186 
187  std::vector<cl_device_id> devicesInCtx(deviceCount);
188  ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
189  deviceCount * sizeof(cl_device_id),
190  devicesInCtx.data(), nullptr);
191 
192  if (ret_err != CL_SUCCESS) {
193  return PI_ERROR_INVALID_CONTEXT;
194  }
195 
196  cl_platform_id curPlatform;
197  ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
198  sizeof(cl_platform_id), &curPlatform, nullptr);
199 
200  if (ret_err != CL_SUCCESS) {
201  return PI_ERROR_INVALID_CONTEXT;
202  }
203 
204  T FuncPtr =
205  (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
206 
207  if (!FuncPtr) {
208  // Cache that the extension is not available
209  FuncPtrs[context] = nullptr;
210  return PI_ERROR_INVALID_VALUE;
211  }
212 
213  *fptr = FuncPtr;
214  FuncPtrs[context] = FuncPtr;
215 
216  return cast<pi_result>(ret_err);
217 }
218 
224  // We test that each alloc type is supported before we actually try to
225  // set KernelExecInfo.
226  cl_bool TrueVal = CL_TRUE;
227  clHostMemAllocINTEL_fn HFunc = nullptr;
228  clSharedMemAllocINTEL_fn SFunc = nullptr;
229  clDeviceMemAllocINTEL_fn DFunc = nullptr;
230  cl_context CLContext;
231  cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
232  sizeof(cl_context), &CLContext, nullptr);
233  if (CLErr != CL_SUCCESS) {
234  return cast<pi_result>(CLErr);
235  }
236 
237  getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
238  cast<pi_context>(CLContext), &HFunc);
239  if (HFunc) {
240  clSetKernelExecInfo(cast<cl_kernel>(kernel),
241  CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
242  sizeof(cl_bool), &TrueVal);
243  }
244 
245  getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
246  cast<pi_context>(CLContext), &DFunc);
247  if (DFunc) {
248  clSetKernelExecInfo(cast<cl_kernel>(kernel),
249  CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
250  sizeof(cl_bool), &TrueVal);
251  }
252 
253  getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
254  cast<pi_context>(CLContext), &SFunc);
255  if (SFunc) {
256  clSetKernelExecInfo(cast<cl_kernel>(kernel),
257  CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
258  sizeof(cl_bool), &TrueVal);
259  }
260  return PI_SUCCESS;
261 }
262 
263 extern "C" {
264 
266  size_t paramValueSize, void *paramValue,
267  size_t *paramValueSizeRet) {
268  switch (paramName) {
269  // TODO: Check regularly to see if support in enabled in OpenCL.
270  // Intel GPU EU device-specific information extensions.
271  // Some of the queries are enabled by cl_intel_device_attribute_query
272  // extension, but it's not yet in the Registry.
281  // TODO: Check if device UUID extension is enabled in OpenCL.
282  // For details about Intel UUID extension, see
283  // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md
284  case PI_DEVICE_INFO_UUID:
285  return PI_ERROR_INVALID_VALUE;
287  // This query is missing beore OpenCL 3.0
288  // Check version and handle appropriately
289  OCLV::OpenCLVersion devVer;
290  cl_device_id deviceID = cast<cl_device_id>(device);
291  cl_int ret_err = getDeviceVersion(deviceID, devVer);
292  if (ret_err != CL_SUCCESS) {
293  return cast<pi_result>(ret_err);
294  }
295 
296  // Minimum required capability to be returned
297  // For OpenCL 1.2, this is all that is required
299 
300  if (devVer >= OCLV::V3_0) {
301  // For OpenCL >=3.0, the query should be implemented
302  cl_device_atomic_capabilities cl_capabilities = 0;
303  cl_int ret_err = clGetDeviceInfo(
304  deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
305  sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr);
306  if (ret_err != CL_SUCCESS)
307  return cast<pi_result>(ret_err);
308 
309  // Mask operation to only consider atomic_memory_order* capabilities
310  cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED |
311  CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
312  CL_DEVICE_ATOMIC_ORDER_SEQ_CST;
313  cl_capabilities &= mask;
314 
315  // The memory order capabilities are hierarchical, if one is implied, all
316  // preceding capbilities are implied as well. Especially in the case of
317  // ACQ_REL.
318  if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
319  capabilities |= PI_MEMORY_ORDER_SEQ_CST;
320  }
321  if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) {
324  }
325  } else if (devVer >= OCLV::V2_0) {
326  // For OpenCL 2.x, return all capabilities
327  // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model)
330  }
331 
332  if (paramValue) {
333  if (paramValueSize < sizeof(pi_memory_order_capabilities))
334  return static_cast<pi_result>(CL_INVALID_VALUE);
335 
336  std::memcpy(paramValue, &capabilities, sizeof(capabilities));
337  }
338 
339  if (paramValueSizeRet)
340  *paramValueSizeRet = sizeof(capabilities);
341 
342  return static_cast<pi_result>(CL_SUCCESS);
343  }
345  // Initialize result to minimum mandated capabilities according to
346  // SYCL2020 4.6.3.2
347  // Because scopes are hierarchical, wider scopes support all narrower
348  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
349  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
353 
354  OCLV::OpenCLVersion devVer;
355 
356  cl_device_id deviceID = cast<cl_device_id>(device);
357  cl_int ret_err = getDeviceVersion(deviceID, devVer);
358  if (ret_err != CL_SUCCESS)
359  return static_cast<pi_result>(ret_err);
360 
361  cl_device_atomic_capabilities devCapabilities = 0;
362  if (devVer >= OCLV::V3_0) {
363  ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
364  sizeof(cl_device_atomic_capabilities),
365  &devCapabilities, nullptr);
366  if (ret_err != CL_SUCCESS)
367  return static_cast<pi_result>(ret_err);
368  assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
369  "Violates minimum mandated guarantee");
370 
371  // Because scopes are hierarchical, wider scopes support all narrower
372  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
373  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
374  // We already initialized to these minimum mandated capabilities. Just
375  // check wider scopes.
376  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
377  result |= PI_MEMORY_SCOPE_DEVICE;
378  }
379 
380  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
381  result |= PI_MEMORY_SCOPE_SYSTEM;
382  }
383 
384  } else {
385  // This info is only available in OpenCL version >= 3.0
386  // Just return minimum mandated capabilities for older versions.
387  // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we
388  // already initialized using it.
389  if (devVer >= OCLV::V2_0) {
390  // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE |
391  // ALL_DEVICES
393  }
394  }
395  if (paramValue) {
396  if (paramValueSize < sizeof(cl_device_atomic_capabilities))
397  return PI_ERROR_INVALID_VALUE;
398 
399  std::memcpy(paramValue, &result, sizeof(result));
400  }
401  if (paramValueSizeRet)
402  *paramValueSizeRet = sizeof(result);
403  return PI_SUCCESS;
404  }
406  // Initialize result to minimum mandated capabilities according to
407  // SYCL2020 4.6.3.2
411 
412  OCLV::OpenCLVersion devVer;
413 
414  cl_device_id deviceID = cast<cl_device_id>(device);
415  cl_int ret_err = getDeviceVersion(deviceID, devVer);
416  if (ret_err != CL_SUCCESS)
417  return static_cast<pi_result>(ret_err);
418 
419  cl_device_atomic_capabilities devCapabilities = 0;
420  if (devVer >= OCLV::V3_0) {
421  ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
422  sizeof(cl_device_atomic_capabilities),
423  &devCapabilities, nullptr);
424  if (ret_err != CL_SUCCESS)
425  return static_cast<pi_result>(ret_err);
426  assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) &&
427  "Violates minimum mandated guarantee");
428  assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) &&
429  "Violates minimum mandated guarantee");
430 
431  // We already initialized to minimum mandated capabilities. Just
432  // check stronger orders.
433  if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
434  result |= PI_MEMORY_ORDER_SEQ_CST;
435  }
436 
437  } else {
438  // This info is only available in OpenCL version >= 3.0
439  // Just return minimum mandated capabilities for older versions.
440  // OpenCL 1.x minimum mandated capabilities are RELAXED | ACQ_REL, we
441  // already initialized using these.
442  if (devVer >= OCLV::V2_0) {
443  // OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL |
444  // SEQ_CST
445  result |= PI_MEMORY_ORDER_SEQ_CST;
446  }
447  }
448  if (paramValue) {
449  if (paramValueSize < sizeof(cl_device_atomic_capabilities))
450  return PI_ERROR_INVALID_VALUE;
451 
452  std::memcpy(paramValue, &result, sizeof(result));
453  }
454  if (paramValueSizeRet)
455  *paramValueSizeRet = sizeof(result);
456  return PI_SUCCESS;
457  }
459  // Initialize result to minimum mandated capabilities according to
460  // SYCL2020 4.6.3.2.
461  // Because scopes are hierarchical, wider scopes support all narrower
462  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
463  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
467 
468  OCLV::OpenCLVersion devVer;
469 
470  cl_device_id deviceID = cast<cl_device_id>(device);
471  cl_int ret_err = getDeviceVersion(deviceID, devVer);
472  if (ret_err != CL_SUCCESS)
473  return static_cast<pi_result>(ret_err);
474 
475  cl_device_atomic_capabilities devCapabilities = 0;
476  if (devVer >= OCLV::V3_0) {
477  ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
478  sizeof(cl_device_atomic_capabilities),
479  &devCapabilities, nullptr);
480  if (ret_err != CL_SUCCESS)
481  return static_cast<pi_result>(ret_err);
482  assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
483  "Violates minimum mandated guarantee");
484 
485  // Because scopes are hierarchical, wider scopes support all narrower
486  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
487  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
488  // We already initialized to these minimum mandated capabilities. Just
489  // check wider scopes.
490  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
491  result |= PI_MEMORY_SCOPE_DEVICE;
492  }
493 
494  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
495  result |= PI_MEMORY_SCOPE_SYSTEM;
496  }
497 
498  } else {
499  // This info is only available in OpenCL version >= 3.0
500  // Just return minimum mandated capabilities for older versions.
501  // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we
502  // already initialized using it.
503  if (devVer >= OCLV::V2_0) {
504  // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE |
505  // ALL_DEVICES
507  }
508  }
509  if (paramValue) {
510  if (paramValueSize < sizeof(cl_device_atomic_capabilities))
511  return PI_ERROR_INVALID_VALUE;
512 
513  std::memcpy(paramValue, &result, sizeof(result));
514  }
515  if (paramValueSizeRet)
516  *paramValueSizeRet = sizeof(result);
517  return PI_SUCCESS;
518  }
520  cl_int ret_err = CL_SUCCESS;
521  cl_bool result = CL_FALSE;
522  bool supported = false;
523 
524  ret_err = checkDeviceExtensions(
525  cast<cl_device_id>(device),
526  {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"},
527  supported);
528  if (ret_err != CL_SUCCESS)
529  return static_cast<pi_result>(ret_err);
530 
531  result = supported;
532  std::memcpy(paramValue, &result, sizeof(cl_bool));
533  return PI_SUCCESS;
534  }
536  // bfloat16 math functions are not yet supported on Intel GPUs.
537  cl_bool result = false;
538  std::memcpy(paramValue, &result, sizeof(cl_bool));
539  return PI_SUCCESS;
540  }
542  cl_bool result = true;
543  std::memcpy(paramValue, &result, sizeof(cl_bool));
544  return PI_SUCCESS;
545  }
547  cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
548  cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
549  sizeof(cl_device_type), &devType, nullptr);
550 
551  // FIXME: here we assume that program built for a root GPU device can be
552  // used on its sub-devices without re-building
553  cl_bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU);
554  std::memcpy(paramValue, &result, sizeof(cl_bool));
555  return PI_SUCCESS;
556  }
558  // Returns the maximum sizes of a work group for each dimension one
559  // could use to submit a kernel. There is no such query defined in OpenCL
560  // so we'll return the maximum value.
561  {
562  if (paramValueSizeRet)
563  *paramValueSizeRet = paramValueSize;
564  static constexpr size_t Max = (std::numeric_limits<size_t>::max)();
565  size_t *out = cast<size_t *>(paramValue);
566  if (paramValueSize >= sizeof(size_t))
567  out[0] = Max;
568  if (paramValueSize >= 2 * sizeof(size_t))
569  out[1] = Max;
570  if (paramValueSize >= 3 * sizeof(size_t))
571  out[2] = Max;
572  return PI_SUCCESS;
573  }
575  pi_int32 result = 1;
576  std::memcpy(paramValue, &result, sizeof(pi_int32));
577  return PI_SUCCESS;
578  }
580  // Corresponding OpenCL query is only available starting with OpenCL 2.1 and
581  // we have to emulate it on older OpenCL runtimes.
582  OCLV::OpenCLVersion version;
583  cl_int err = getDeviceVersion(cast<cl_device_id>(device), version);
584  if (err != CL_SUCCESS)
585  return static_cast<pi_result>(err);
586 
587  if (version >= OCLV::V2_1) {
588  err = clGetDeviceInfo(cast<cl_device_id>(device),
589  cast<cl_device_info>(paramName), paramValueSize,
590  paramValue, paramValueSizeRet);
591  if (err != CL_SUCCESS)
592  return static_cast<pi_result>(err);
593 
594  if (paramValue && *static_cast<cl_uint *>(paramValue) == 0u) {
595  // OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 spec
596  // says that minimum possible value is 1.
597  cl_uint value = 1u;
598  std::memcpy(paramValue, &value, sizeof(cl_uint));
599  }
600 
601  return static_cast<pi_result>(err);
602  }
603 
604  // Otherwise, we can't query anything, because even cl_khr_subgroups does
605  // not provide similar query. Therefore, simply return minimum possible
606  // value 1 here.
607  if (paramValue && paramValueSize < sizeof(cl_uint))
608  return static_cast<pi_result>(CL_INVALID_VALUE);
609  if (paramValueSizeRet)
610  *paramValueSizeRet = sizeof(cl_uint);
611 
612  if (paramValue) {
613  cl_uint value = 1u;
614  std::memcpy(paramValue, &value, sizeof(cl_uint));
615  }
616 
617  return static_cast<pi_result>(CL_SUCCESS);
618  }
619  default:
620  cl_int result = clGetDeviceInfo(
621  cast<cl_device_id>(device), cast<cl_device_info>(paramName),
622  paramValueSize, paramValue, paramValueSizeRet);
623  return static_cast<pi_result>(result);
624  }
625 }
626 
628  pi_uint32 *num_platforms) {
629  cl_int result = clGetPlatformIDs(cast<cl_uint>(num_entries),
630  cast<cl_platform_id *>(platforms),
631  cast<cl_uint *>(num_platforms));
632 
633  // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms
634  if (result == CL_PLATFORM_NOT_FOUND_KHR) {
635  assert(num_platforms != 0);
636  *num_platforms = 0;
637  result = PI_SUCCESS;
638  }
639  return static_cast<pi_result>(result);
640 }
641 
643  pi_platform *platform) {
644  assert(platform);
645  assert(nativeHandle);
646  *platform = reinterpret_cast<pi_platform>(nativeHandle);
647  return PI_SUCCESS;
648 }
649 
651  pi_uint32 num_entries, pi_device *devices,
652  pi_uint32 *num_devices) {
653  cl_int result = clGetDeviceIDs(
654  cast<cl_platform_id>(platform), cast<cl_device_type>(device_type),
655  cast<cl_uint>(num_entries), cast<cl_device_id *>(devices),
656  cast<cl_uint *>(num_devices));
657 
658  // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices
659  if (result == CL_DEVICE_NOT_FOUND) {
660  assert(num_devices != 0);
661  *num_devices = 0;
662  result = PI_SUCCESS;
663  }
664  return cast<pi_result>(result);
665 }
666 
668  pi_uint32 num_images,
669  pi_uint32 *selected_image_ind) {
670 
671  // TODO: this is a bare-bones implementation for choosing a device image
672  // that would be compatible with the targeted device. An AOT-compiled
673  // image is preferred over SPIR-V for known devices (i.e. Intel devices)
674  // The implementation makes no effort to differentiate between multiple images
675  // for the given device, and simply picks the first one compatible
676  // Real implementation will use the same mechanism OpenCL ICD dispatcher
677  // uses. Something like:
678  // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT);
679  // return context->dispatch->piextDeviceSelectIR(
680  // ctx, images, num_images, selected_image);
681  // where context->dispatch is set to the dispatch table provided by PI
682  // plugin for platform/device the ctx was created for.
683 
684  // Choose the binary target for the provided device
685  const char *image_target = nullptr;
686  // Get the type of the device
687  cl_device_type device_type;
688  constexpr pi_uint32 invalid_ind = std::numeric_limits<pi_uint32>::max();
689  cl_int ret_err =
690  clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
691  sizeof(cl_device_type), &device_type, nullptr);
692  if (ret_err != CL_SUCCESS) {
693  *selected_image_ind = invalid_ind;
694  return cast<pi_result>(ret_err);
695  }
696 
697  switch (device_type) {
698  // TODO: Factor out vendor specifics into a separate source
699  // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp?
700 
701  // We'll attempt to find an image that was AOT-compiled
702  // from a SPIR-V image into an image specific for:
703 
704  case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU
706  break;
707  case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU
709  break;
710  case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA
712  break;
713  default:
714  // Otherwise, we'll attempt to find and JIT-compile
715  // a device-independent SPIR-V image
717  break;
718  }
719 
720  // Find the appropriate device image, fallback to spirv if not found
721  pi_uint32 fallback = invalid_ind;
722  for (pi_uint32 i = 0; i < num_images; ++i) {
723  if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) {
724  *selected_image_ind = i;
725  return PI_SUCCESS;
726  }
727  if (strcmp(images[i]->DeviceTargetSpec,
729  fallback = i;
730  }
731  // Points to a spirv image, if such indeed was found
732  if ((*selected_image_ind = fallback) != invalid_ind)
733  return PI_SUCCESS;
734  // No image can be loaded for the given device
735  return PI_ERROR_INVALID_BINARY;
736 }
737 
739  pi_platform, pi_device *piDevice) {
740  assert(piDevice != nullptr);
741  *piDevice = reinterpret_cast<pi_device>(nativeHandle);
742  return PI_SUCCESS;
743 }
744 
746  pi_queue_properties *Properties, pi_queue *Queue) {
747  assert(Properties);
748  // Expect flags mask to be passed first.
749  assert(Properties[0] == PI_QUEUE_FLAGS);
750  if (Properties[0] != PI_QUEUE_FLAGS)
751  return PI_ERROR_INVALID_VALUE;
752  pi_queue_properties Flags = Properties[1];
753  // Extra data isn't supported yet.
754  assert(Properties[2] == 0);
755  if (Properties[2] != 0)
756  return PI_ERROR_INVALID_VALUE;
757  return piQueueCreate(Context, Device, Flags, Queue);
758 }
760  pi_queue_properties properties, pi_queue *queue) {
761  assert(queue && "piQueueCreate failed, queue argument is null");
762 
763  cl_platform_id curPlatform;
764  cl_int ret_err =
765  clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_PLATFORM,
766  sizeof(cl_platform_id), &curPlatform, nullptr);
767 
768  CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err);
769 
770  // Check that unexpected bits are not set.
771  assert(!(properties &
776 
777  // Properties supported by OpenCL backend.
778  cl_command_queue_properties SupportByOpenCL =
779  CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE |
780  CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT;
781 
782  OCLV::OpenCLVersion version;
783  ret_err = getPlatformVersion(curPlatform, version);
784 
785  CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err);
786 
787  if (version >= OCLV::V2_0) {
788  *queue = cast<pi_queue>(clCreateCommandQueue(
789  cast<cl_context>(context), cast<cl_device_id>(device),
790  cast<cl_command_queue_properties>(properties) & SupportByOpenCL,
791  &ret_err));
792  return cast<pi_result>(ret_err);
793  }
794 
795  cl_queue_properties CreationFlagProperties[] = {
796  CL_QUEUE_PROPERTIES,
797  cast<cl_command_queue_properties>(properties) & SupportByOpenCL, 0};
798  *queue = cast<pi_queue>(clCreateCommandQueueWithProperties(
799  cast<cl_context>(context), cast<cl_device_id>(device),
800  CreationFlagProperties, &ret_err));
801  return cast<pi_result>(ret_err);
802 }
803 
805  size_t param_value_size, void *param_value,
806  size_t *param_value_size_ret) {
807  if (queue == nullptr) {
808  return PI_ERROR_INVALID_QUEUE;
809  }
810 
811  switch (param_name) {
813  // OpenCL doesn't provide API to check the status of the queue.
814  return PI_ERROR_INVALID_VALUE;
815  default:
816  cl_int CLErr = clGetCommandQueueInfo(
817  cast<cl_command_queue>(queue), cast<cl_command_queue_info>(param_name),
818  param_value_size, param_value, param_value_size_ret);
819  if (CLErr != CL_SUCCESS) {
820  return cast<pi_result>(CLErr);
821  }
822  }
823  return PI_SUCCESS;
824 }
825 
828  bool ownNativeHandle,
829  pi_queue *piQueue) {
830  (void)ownNativeHandle;
831  assert(piQueue != nullptr);
832  *piQueue = reinterpret_cast<pi_queue>(nativeHandle);
833  clRetainCommandQueue(cast<cl_command_queue>(nativeHandle));
834  return PI_SUCCESS;
835 }
836 
837 pi_result piProgramCreate(pi_context context, const void *il, size_t length,
838  pi_program *res_program) {
839  cl_uint deviceCount;
840  cl_int ret_err =
841  clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
842  sizeof(cl_uint), &deviceCount, nullptr);
843 
844  std::vector<cl_device_id> devicesInCtx(deviceCount);
845 
846  if (ret_err != CL_SUCCESS || deviceCount < 1) {
847  if (res_program != nullptr)
848  *res_program = nullptr;
849  return cast<pi_result>(CL_INVALID_CONTEXT);
850  }
851 
852  ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
853  deviceCount * sizeof(cl_device_id),
854  devicesInCtx.data(), nullptr);
855 
856  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
857 
858  cl_platform_id curPlatform;
859  ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
860  sizeof(cl_platform_id), &curPlatform, nullptr);
861 
862  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
863 
864  OCLV::OpenCLVersion platVer;
865  ret_err = getPlatformVersion(curPlatform, platVer);
866 
867  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
868 
869  pi_result err = PI_SUCCESS;
870  if (platVer >= OCLV::V2_1) {
871 
872  /* Make sure all devices support CL 2.1 or newer as well. */
873  for (cl_device_id dev : devicesInCtx) {
874  OCLV::OpenCLVersion devVer;
875 
876  ret_err = getDeviceVersion(dev, devVer);
877  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
878 
879  /* If the device does not support CL 2.1 or greater, we need to make sure
880  * it supports the cl_khr_il_program extension.
881  */
882  if (devVer < OCLV::V2_1) {
883  bool supported = false;
884 
885  ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported);
886  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
887 
888  if (!supported)
889  return cast<pi_result>(CL_INVALID_OPERATION);
890  }
891  }
892  if (res_program != nullptr)
893  *res_program = cast<pi_program>(clCreateProgramWithIL(
894  cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
895  return err;
896  }
897 
898  /* If none of the devices conform with CL 2.1 or newer make sure they all
899  * support the cl_khr_il_program extension.
900  */
901  for (cl_device_id dev : devicesInCtx) {
902  bool supported = false;
903 
904  ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported);
905  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
906 
907  if (!supported)
908  return cast<pi_result>(CL_INVALID_OPERATION);
909  }
910 
911  using apiFuncT =
912  cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *);
913  apiFuncT funcPtr =
914  reinterpret_cast<apiFuncT>(clGetExtensionFunctionAddressForPlatform(
915  curPlatform, "clCreateProgramWithILKHR"));
916 
917  assert(funcPtr != nullptr);
918  if (res_program != nullptr)
919  *res_program = cast<pi_program>(
920  funcPtr(cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
921  else
922  err = PI_ERROR_INVALID_VALUE;
923 
924  return err;
925 }
926 
928  pi_context, bool,
929  pi_program *piProgram) {
930  assert(piProgram != nullptr);
931  *piProgram = reinterpret_cast<pi_program>(nativeHandle);
932  return PI_SUCCESS;
933 }
934 
936  const pi_sampler_properties *sampler_properties,
937  pi_sampler *result_sampler) {
938  // Initialize properties according to OpenCL 2.1 spec.
939  pi_result error_code;
940  pi_bool normalizedCoords = PI_TRUE;
943 
944  // Unpack sampler properties
945  for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0;
946  ++i) {
947  if (sampler_properties[i] == PI_SAMPLER_INFO_NORMALIZED_COORDS) {
948  normalizedCoords = static_cast<pi_bool>(sampler_properties[++i]);
949  } else if (sampler_properties[i] == PI_SAMPLER_INFO_ADDRESSING_MODE) {
950  addressingMode =
951  static_cast<pi_sampler_addressing_mode>(sampler_properties[++i]);
952  } else if (sampler_properties[i] == PI_SAMPLER_INFO_FILTER_MODE) {
953  filterMode = static_cast<pi_sampler_filter_mode>(sampler_properties[++i]);
954  } else {
955  assert(false && "Cannot recognize sampler property");
956  }
957  }
958 
959  // Always call OpenCL 1.0 API
960  *result_sampler = cast<pi_sampler>(
961  clCreateSampler(cast<cl_context>(context), normalizedCoords,
962  addressingMode, filterMode, cast<cl_int *>(&error_code)));
963  return error_code;
964 }
965 
967  const pi_mem *arg_value) {
968  return cast<pi_result>(
969  clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
970  sizeof(arg_value), cast<const cl_mem *>(arg_value)));
971 }
972 
974  const pi_sampler *arg_value) {
975  return cast<pi_result>(
976  clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
977  sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
978 }
979 
981  pi_context, pi_program, bool,
982  pi_kernel *piKernel) {
983  assert(piKernel != nullptr);
984  *piKernel = reinterpret_cast<pi_kernel>(nativeHandle);
985  return PI_SUCCESS;
986 }
987 
988 // Function gets characters between delimeter's in str
989 // then checks if they are equal to the sub_str.
990 // returns true if there is at least one instance
991 // returns false if there are no instances of the name
992 static bool is_in_separated_string(const std::string &str, char delimiter,
993  const std::string &sub_str) {
994  size_t beg = 0;
995  size_t length = 0;
996  for (const auto &x : str) {
997  if (x == delimiter) {
998  if (str.substr(beg, length) == sub_str)
999  return true;
1000 
1001  beg += length + 1;
1002  length = 0;
1003  continue;
1004  }
1005  length++;
1006  }
1007  if (length != 0)
1008  if (str.substr(beg, length) == sub_str)
1009  return true;
1010 
1011  return false;
1012 }
1013 
1014 typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)(
1015  cl_device_id device, cl_program program, const char *FuncName,
1016  cl_ulong *ret_ptr);
1018  const char *func_name,
1019  pi_uint64 *function_pointer_ret) {
1020 
1021  cl_context CLContext = nullptr;
1022  cl_int ret_err =
1023  clGetProgramInfo(cast<cl_program>(program), CL_PROGRAM_CONTEXT,
1024  sizeof(CLContext), &CLContext, nullptr);
1025 
1026  if (ret_err != CL_SUCCESS)
1027  return cast<pi_result>(ret_err);
1028 
1029  clGetDeviceFunctionPointer_fn FuncT = nullptr;
1032  cast<pi_context>(CLContext), &FuncT);
1033 
1034  pi_result pi_ret_err = PI_SUCCESS;
1035 
1036  // Check if kernel name exists, to prevent opencl runtime throwing exception
1037  // with cpu runtime
1038  // TODO: Use fallback search method if extension does not exist once CPU
1039  // runtime no longer throws exceptions and prints messages when given
1040  // unavailable functions.
1041  *function_pointer_ret = 0;
1042  size_t Size;
1043  cl_int Res =
1044  clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
1045  0, nullptr, &Size);
1046  if (Res != CL_SUCCESS)
1047  return cast<pi_result>(Res);
1048 
1049  std::string ClResult(Size, ' ');
1050  Res =
1051  clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
1052  ClResult.size(), &ClResult[0], nullptr);
1053  if (Res != CL_SUCCESS)
1054  return cast<pi_result>(Res);
1055 
1056  // Get rid of the null terminator and search for kernel_name
1057  // If function cannot be found return error code to indicate it
1058  // exists
1059  ClResult.pop_back();
1060  if (!is_in_separated_string(ClResult, ';', func_name))
1061  return PI_ERROR_INVALID_KERNEL_NAME;
1062 
1063  pi_ret_err = PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1064 
1065  // If clGetDeviceFunctionPointer is in list of extensions
1066  if (FuncT) {
1067  pi_ret_err = cast<pi_result>(FuncT(cast<cl_device_id>(device),
1068  cast<cl_program>(program), func_name,
1069  function_pointer_ret));
1070  // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address
1071  // cannot be found even if kernel exits. As the kernel does exist return
1072  // that the address is not available
1073  if (pi_ret_err == CL_INVALID_ARG_VALUE) {
1074  *function_pointer_ret = 0;
1075  return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1076  }
1077  }
1078  return pi_ret_err;
1079 }
1080 
1082  pi_uint32 num_devices, const pi_device *devices,
1083  void (*pfn_notify)(const char *errinfo,
1084  const void *private_info,
1085  size_t cb, void *user_data1),
1086  void *user_data, pi_context *retcontext) {
1087  pi_result ret = PI_ERROR_INVALID_OPERATION;
1088  *retcontext = cast<pi_context>(
1089  clCreateContext(properties, cast<cl_uint>(num_devices),
1090  cast<const cl_device_id *>(devices), pfn_notify,
1091  user_data, cast<cl_int *>(&ret)));
1092 
1093  return ret;
1094 }
1095 
1097  pi_uint32 num_devices,
1098  const pi_device *devices,
1099  bool ownNativeHandle,
1100  pi_context *piContext) {
1101  (void)num_devices;
1102  (void)devices;
1103  (void)ownNativeHandle;
1104  assert(piContext != nullptr);
1105  assert(ownNativeHandle == false);
1106  *piContext = reinterpret_cast<pi_context>(nativeHandle);
1107  return PI_SUCCESS;
1108 }
1109 
1111  size_t paramValueSize, void *paramValue,
1112  size_t *paramValueSizeRet) {
1113  switch (paramName) {
1117  // 2D USM memops are not supported.
1118  cl_bool result = false;
1119  std::memcpy(paramValue, &result, sizeof(cl_bool));
1120  return PI_SUCCESS;
1121  }
1126  // These queries should be dealt with in context_impl.cpp by calling the
1127  // queries of each device separately and building the intersection set.
1128  setErrorMessage("These queries should have never come here.",
1129  PI_ERROR_INVALID_ARG_VALUE);
1130  return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
1131  }
1132  default:
1133  cl_int result = clGetContextInfo(
1134  cast<cl_context>(context), cast<cl_context_info>(paramName),
1135  paramValueSize, paramValue, paramValueSizeRet);
1136  return static_cast<pi_result>(result);
1137  }
1138 }
1139 
1141  void *host_ptr, pi_mem *ret_mem,
1142  const pi_mem_properties *properties) {
1143  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1144  if (properties) {
1145  // TODO: need to check if all properties are supported by OpenCL RT and
1146  // ignore unsupported
1147  clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
1148  // First we need to look up the function pointer
1150  clCreateBufferWithPropertiesINTEL_fn>(
1151  context, &FuncPtr);
1152  if (FuncPtr) {
1153  *ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
1154  cast<cl_mem_flags>(flags), size, host_ptr,
1155  cast<cl_int *>(&ret_err)));
1156  return ret_err;
1157  }
1158  }
1159 
1160  *ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
1161  cast<cl_mem_flags>(flags), size,
1162  host_ptr, cast<cl_int *>(&ret_err)));
1163  return ret_err;
1164 }
1165 
1167  const pi_image_format *image_format,
1168  const pi_image_desc *image_desc, void *host_ptr,
1169  pi_mem *ret_mem) {
1170  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1171  *ret_mem = cast<pi_mem>(
1172  clCreateImage(cast<cl_context>(context), cast<cl_mem_flags>(flags),
1173  cast<const cl_image_format *>(image_format),
1174  cast<const cl_image_desc *>(image_desc), host_ptr,
1175  cast<cl_int *>(&ret_err)));
1176 
1177  return ret_err;
1178 }
1179 
1181  pi_buffer_create_type buffer_create_type,
1182  void *buffer_create_info, pi_mem *ret_mem) {
1183 
1184  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1185  *ret_mem = cast<pi_mem>(
1186  clCreateSubBuffer(cast<cl_mem>(buffer), cast<cl_mem_flags>(flags),
1187  cast<cl_buffer_create_type>(buffer_create_type),
1188  buffer_create_info, cast<cl_int *>(&ret_err)));
1189  return ret_err;
1190 }
1191 
1193  pi_context context,
1194  bool ownNativeHandle, pi_mem *piMem) {
1195  (void)context;
1196  (void)ownNativeHandle;
1197  assert(piMem != nullptr);
1198  *piMem = reinterpret_cast<pi_mem>(nativeHandle);
1199  return PI_SUCCESS;
1200 }
1201 
1203  const char **strings,
1204  const size_t *lengths,
1205  pi_program *ret_program) {
1206 
1207  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1208  *ret_program = cast<pi_program>(
1209  clCreateProgramWithSource(cast<cl_context>(context), cast<cl_uint>(count),
1210  strings, lengths, cast<cl_int *>(&ret_err)));
1211  return ret_err;
1212 }
1213 
1215  pi_context context, pi_uint32 num_devices, const pi_device *device_list,
1216  const size_t *lengths, const unsigned char **binaries,
1217  size_t num_metadata_entries, const pi_device_binary_property *metadata,
1218  pi_int32 *binary_status, pi_program *ret_program) {
1219  (void)metadata;
1220  (void)num_metadata_entries;
1221 
1222  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1223  *ret_program = cast<pi_program>(clCreateProgramWithBinary(
1224  cast<cl_context>(context), cast<cl_uint>(num_devices),
1225  cast<const cl_device_id *>(device_list), lengths, binaries,
1226  cast<cl_int *>(binary_status), cast<cl_int *>(&ret_err)));
1227  return ret_err;
1228 }
1229 
1231  const pi_device *device_list, const char *options,
1232  pi_uint32 num_input_programs,
1233  const pi_program *input_programs,
1234  void (*pfn_notify)(pi_program program, void *user_data),
1235  void *user_data, pi_program *ret_program) {
1236 
1237  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1238  *ret_program = cast<pi_program>(
1239  clLinkProgram(cast<cl_context>(context), cast<cl_uint>(num_devices),
1240  cast<const cl_device_id *>(device_list), options,
1241  cast<cl_uint>(num_input_programs),
1242  cast<const cl_program *>(input_programs),
1243  cast<void (*)(cl_program, void *)>(pfn_notify), user_data,
1244  cast<cl_int *>(&ret_err)));
1245  return ret_err;
1246 }
1247 
1248 pi_result piKernelCreate(pi_program program, const char *kernel_name,
1249  pi_kernel *ret_kernel) {
1250 
1251  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1252  *ret_kernel = cast<pi_kernel>(clCreateKernel(
1253  cast<cl_program>(program), kernel_name, cast<cl_int *>(&ret_err)));
1254  return ret_err;
1255 }
1256 
1258  pi_kernel_group_info param_name,
1259  size_t param_value_size, void *param_value,
1260  size_t *param_value_size_ret) {
1261  if (kernel == nullptr) {
1262  return PI_ERROR_INVALID_KERNEL;
1263  }
1264 
1265  switch (param_name) {
1267  return PI_ERROR_INVALID_VALUE;
1268  default:
1269  cl_int result = clGetKernelWorkGroupInfo(
1270  cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1271  cast<cl_kernel_work_group_info>(param_name), param_value_size,
1272  param_value, param_value_size_ret);
1273  return static_cast<pi_result>(result);
1274  }
1275 }
1276 
1278  pi_kernel_sub_group_info param_name,
1279  size_t input_value_size,
1280  const void *input_value,
1281  size_t param_value_size, void *param_value,
1282  size_t *param_value_size_ret) {
1283  (void)param_value_size;
1284  size_t ret_val;
1285  cl_int ret_err;
1286 
1287  std::shared_ptr<void> implicit_input_value;
1288  if (param_name == PI_KERNEL_MAX_SUB_GROUP_SIZE && !input_value) {
1289  // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no
1290  // value is given we use the max work item size of the device in the first
1291  // dimention to avoid truncation of max sub-group size.
1292  pi_uint32 max_dims = 0;
1293  pi_result pi_ret_err =
1295  sizeof(pi_uint32), &max_dims, nullptr);
1296  if (pi_ret_err != PI_SUCCESS)
1297  return pi_ret_err;
1298  std::shared_ptr<size_t[]> WGSizes{new size_t[max_dims]};
1299  pi_ret_err =
1301  max_dims * sizeof(size_t), WGSizes.get(), nullptr);
1302  if (pi_ret_err != PI_SUCCESS)
1303  return pi_ret_err;
1304  for (size_t i = 1; i < max_dims; ++i)
1305  WGSizes.get()[i] = 1;
1306  implicit_input_value = std::move(WGSizes);
1307  input_value_size = max_dims * sizeof(size_t);
1308  input_value = implicit_input_value.get();
1309  }
1310 
1311  ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
1312  cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1313  cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
1314  sizeof(size_t), &ret_val, param_value_size_ret));
1315 
1316  if (ret_err != CL_SUCCESS)
1317  return cast<pi_result>(ret_err);
1318 
1319  *(static_cast<uint32_t *>(param_value)) = static_cast<uint32_t>(ret_val);
1320  if (param_value_size_ret)
1321  *param_value_size_ret = sizeof(uint32_t);
1322  return PI_SUCCESS;
1323 }
1324 
1326 
1327  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1328  auto *cl_err = cast<cl_int *>(&ret_err);
1329 
1330  cl_event e = clCreateUserEvent(cast<cl_context>(context), cl_err);
1331  *ret_event = cast<pi_event>(e);
1332  if (*cl_err != CL_SUCCESS)
1333  return ret_err;
1334  *cl_err = clSetUserEventStatus(e, CL_COMPLETE);
1335  return ret_err;
1336 }
1337 
1339  pi_context context,
1340  bool ownNativeHandle,
1341  pi_event *piEvent) {
1342  (void)context;
1343  // TODO: ignore this, but eventually want to return error as unsupported
1344  (void)ownNativeHandle;
1345 
1346  assert(piEvent != nullptr);
1347  assert(nativeHandle);
1348  assert(context);
1349 
1350  *piEvent = reinterpret_cast<pi_event>(nativeHandle);
1351  return PI_SUCCESS;
1352 }
1353 
1355  pi_bool blocking_map, pi_map_flags map_flags,
1356  size_t offset, size_t size,
1357  pi_uint32 num_events_in_wait_list,
1358  const pi_event *event_wait_list,
1359  pi_event *event, void **ret_map) {
1360 
1361  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1362  *ret_map = cast<void *>(clEnqueueMapBuffer(
1363  cast<cl_command_queue>(command_queue), cast<cl_mem>(buffer),
1364  cast<cl_bool>(blocking_map), map_flags, offset, size,
1365  cast<cl_uint>(num_events_in_wait_list),
1366  cast<const cl_event *>(event_wait_list), cast<cl_event *>(event),
1367  cast<cl_int *>(&ret_err)));
1368  return ret_err;
1369 }
1370 
1371 //
1372 // USM
1373 //
1374 
1382 pi_result piextUSMHostAlloc(void **result_ptr, pi_context context,
1383  pi_usm_mem_properties *properties, size_t size,
1384  pi_uint32 alignment) {
1385 
1386  void *Ptr = nullptr;
1387  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1388 
1389  // First we need to look up the function pointer
1390  clHostMemAllocINTEL_fn FuncPtr = nullptr;
1391  RetVal = getExtFuncFromContext<clHostMemAllocName, clHostMemAllocINTEL_fn>(
1392  context, &FuncPtr);
1393 
1394  if (FuncPtr) {
1395  Ptr = FuncPtr(cast<cl_context>(context),
1396  cast<cl_mem_properties_intel *>(properties), size, alignment,
1397  cast<cl_int *>(&RetVal));
1398  }
1399 
1400  *result_ptr = Ptr;
1401 
1402  // ensure we aligned the allocation correctly
1403  if (RetVal == PI_SUCCESS && alignment != 0)
1404  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0 &&
1405  "allocation not aligned correctly");
1406 
1407  return RetVal;
1408 }
1409 
1418 pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context,
1419  pi_device device,
1420  pi_usm_mem_properties *properties, size_t size,
1421  pi_uint32 alignment) {
1422 
1423  void *Ptr = nullptr;
1424  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1425 
1426  // First we need to look up the function pointer
1427  clDeviceMemAllocINTEL_fn FuncPtr = nullptr;
1428  RetVal =
1429  getExtFuncFromContext<clDeviceMemAllocName, clDeviceMemAllocINTEL_fn>(
1430  context, &FuncPtr);
1431 
1432  if (FuncPtr) {
1433  Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
1434  cast<cl_mem_properties_intel *>(properties), size, alignment,
1435  cast<cl_int *>(&RetVal));
1436  }
1437 
1438  *result_ptr = Ptr;
1439 
1440  // ensure we aligned the allocation correctly
1441  if (RetVal == PI_SUCCESS && alignment != 0)
1442  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0 &&
1443  "allocation not aligned correctly");
1444 
1445  return RetVal;
1446 }
1447 
1456 pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context,
1457  pi_device device,
1458  pi_usm_mem_properties *properties, size_t size,
1459  pi_uint32 alignment) {
1460 
1461  void *Ptr = nullptr;
1462  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1463 
1464  // First we need to look up the function pointer
1465  clSharedMemAllocINTEL_fn FuncPtr = nullptr;
1466  RetVal =
1467  getExtFuncFromContext<clSharedMemAllocName, clSharedMemAllocINTEL_fn>(
1468  context, &FuncPtr);
1469 
1470  if (FuncPtr) {
1471  Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
1472  cast<cl_mem_properties_intel *>(properties), size, alignment,
1473  cast<cl_int *>(&RetVal));
1474  }
1475 
1476  *result_ptr = Ptr;
1477 
1478  assert(alignment == 0 ||
1479  (RetVal == PI_SUCCESS &&
1480  reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
1481  return RetVal;
1482 }
1483 
1488 pi_result piextUSMFree(pi_context context, void *ptr) {
1489  // Use a blocking free to avoid issues with indirect access from kernels that
1490  // might be still running.
1491  clMemBlockingFreeINTEL_fn FuncPtr = nullptr;
1492 
1493  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1494  RetVal =
1495  getExtFuncFromContext<clMemBlockingFreeName, clMemBlockingFreeINTEL_fn>(
1496  context, &FuncPtr);
1497 
1498  if (FuncPtr) {
1499  RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr));
1500  }
1501 
1502  return RetVal;
1503 }
1504 
1513  size_t arg_size, const void *arg_value) {
1514  (void)arg_size;
1515 
1516  // Size is unused in CL as pointer args are passed by value.
1517 
1518  // Have to look up the context from the kernel
1519  cl_context CLContext;
1520  cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
1521  sizeof(cl_context), &CLContext, nullptr);
1522  if (CLErr != CL_SUCCESS) {
1523  return cast<pi_result>(CLErr);
1524  }
1525 
1526  clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr;
1528  clSetKernelArgMemPointerINTEL_fn>(
1529  cast<pi_context>(CLContext), &FuncPtr);
1530 
1531  if (FuncPtr) {
1532  // OpenCL passes pointers by value not by reference
1533  // This means we need to deref the arg to get the pointer value
1534  auto PtrToPtr = reinterpret_cast<const intptr_t *>(arg_value);
1535  auto DerefPtr = reinterpret_cast<void *>(*PtrToPtr);
1536  RetVal =
1537  cast<pi_result>(FuncPtr(cast<cl_kernel>(kernel), arg_index, DerefPtr));
1538  }
1539 
1540  return RetVal;
1541 }
1542 
1554  size_t count, pi_uint32 num_events_in_waitlist,
1555  const pi_event *events_waitlist,
1556  pi_event *event) {
1557 
1558  // Have to look up the context from the kernel
1559  cl_context CLContext;
1560  cl_int CLErr =
1561  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1562  sizeof(cl_context), &CLContext, nullptr);
1563  if (CLErr != CL_SUCCESS) {
1564  return cast<pi_result>(CLErr);
1565  }
1566 
1567  clEnqueueMemsetINTEL_fn FuncPtr = nullptr;
1568  pi_result RetVal =
1569  getExtFuncFromContext<clEnqueueMemsetName, clEnqueueMemsetINTEL_fn>(
1570  cast<pi_context>(CLContext), &FuncPtr);
1571 
1572  if (FuncPtr) {
1573  RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue), ptr, value,
1574  count, num_events_in_waitlist,
1575  cast<const cl_event *>(events_waitlist),
1576  cast<cl_event *>(event)));
1577  }
1578 
1579  return RetVal;
1580 }
1581 
1592 pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr,
1593  const void *src_ptr, size_t size,
1594  pi_uint32 num_events_in_waitlist,
1595  const pi_event *events_waitlist,
1596  pi_event *event) {
1597 
1598  // Have to look up the context from the kernel
1599  cl_context CLContext;
1600  cl_int CLErr =
1601  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1602  sizeof(cl_context), &CLContext, nullptr);
1603  if (CLErr != CL_SUCCESS) {
1604  return cast<pi_result>(CLErr);
1605  }
1606 
1607  clEnqueueMemcpyINTEL_fn FuncPtr = nullptr;
1608  pi_result RetVal =
1609  getExtFuncFromContext<clEnqueueMemcpyName, clEnqueueMemcpyINTEL_fn>(
1610  cast<pi_context>(CLContext), &FuncPtr);
1611 
1612  if (FuncPtr) {
1613  RetVal = cast<pi_result>(
1614  FuncPtr(cast<cl_command_queue>(queue), blocking, dst_ptr, src_ptr, size,
1615  num_events_in_waitlist, cast<const cl_event *>(events_waitlist),
1616  cast<cl_event *>(event)));
1617  }
1618 
1619  return RetVal;
1620 }
1621 
1631 pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size,
1632  pi_usm_migration_flags flags,
1633  pi_uint32 num_events_in_waitlist,
1634  const pi_event *events_waitlist,
1635  pi_event *event) {
1636  (void)ptr;
1637  (void)size;
1638 
1639  // flags is currently unused so fail if set
1640  if (flags != 0)
1641  return PI_ERROR_INVALID_VALUE;
1642 
1643  return cast<pi_result>(clEnqueueMarkerWithWaitList(
1644  cast<cl_command_queue>(queue), num_events_in_waitlist,
1645  cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
1646 
1647  /*
1648  // Use this once impls support it.
1649  // Have to look up the context from the kernel
1650  cl_context CLContext;
1651  cl_int CLErr =
1652  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1653  sizeof(cl_context), &CLContext, nullptr);
1654  if (CLErr != CL_SUCCESS) {
1655  return cast<pi_result>(CLErr);
1656  }
1657 
1658  clEnqueueMigrateMemINTEL_fn FuncPtr;
1659  pi_result Err = getExtFuncFromContext<clEnqueueMigrateMemINTEL_fn>(
1660  cast<pi_context>(CLContext), "clEnqueueMigrateMemINTEL", &FuncPtr);
1661 
1662  if (Err != PI_SUCCESS) {
1663  RetVal = Err;
1664  } else {
1665  RetVal = cast<pi_result>(FuncPtr(
1666  cast<cl_command_queue>(queue), ptr, size, flags, num_events_in_waitlist,
1667  reinterpret_cast<const cl_event *>(events_waitlist),
1668  reinterpret_cast<cl_event *>(event)));
1669  }
1670  */
1671 }
1672 
1680 // USM memadvise API to govern behavior of automatic migration mechanisms
1682  size_t length, pi_mem_advice advice,
1683  pi_event *event) {
1684  (void)ptr;
1685  (void)length;
1686  (void)advice;
1687 
1688  return cast<pi_result>(
1689  clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0, nullptr,
1690  reinterpret_cast<cl_event *>(event)));
1691 
1692  /*
1693  // Change to use this once drivers support it.
1694 
1695  // Have to look up the context from the kernel
1696  cl_context CLContext;
1697  cl_int CLErr = clGetCommandQueueInfo(cast<cl_command_queue>(queue),
1698  CL_QUEUE_CONTEXT,
1699  sizeof(cl_context),
1700  &CLContext, nullptr);
1701  if (CLErr != CL_SUCCESS) {
1702  return cast<pi_result>(CLErr);
1703  }
1704 
1705  clEnqueueMemAdviseINTEL_fn FuncPtr;
1706  pi_result Err =
1707  getExtFuncFromContext<clEnqueueMemAdviseINTEL_fn>(
1708  cast<pi_context>(CLContext), "clEnqueueMemAdviseINTEL", &FuncPtr);
1709 
1710  if (Err != PI_SUCCESS) {
1711  RetVal = Err;
1712  } else {
1713  RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue),
1714  ptr, length, advice, 0, nullptr,
1715  reinterpret_cast<cl_event *>(event)));
1716  }
1717  */
1718 }
1719 
1732 __SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr,
1733  size_t pitch, size_t pattern_size,
1734  const void *pattern, size_t width,
1735  size_t height,
1736  pi_uint32 num_events_in_waitlist,
1737  const pi_event *events_waitlist,
1738  pi_event *event) {
1739  std::ignore = queue;
1740  std::ignore = ptr;
1741  std::ignore = pitch;
1742  std::ignore = pattern_size;
1743  std::ignore = pattern;
1744  std::ignore = width;
1745  std::ignore = height;
1746  std::ignore = num_events_in_waitlist;
1747  std::ignore = events_waitlist;
1748  std::ignore = event;
1749  return PI_ERROR_INVALID_OPERATION;
1750 }
1751 
1764  pi_queue queue, void *ptr, size_t pitch, int value, size_t width,
1765  size_t height, pi_uint32 num_events_in_waitlist,
1766  const pi_event *events_waitlist, pi_event *event) {
1767  std::ignore = queue;
1768  std::ignore = ptr;
1769  std::ignore = pitch;
1770  std::ignore = value;
1771  std::ignore = width;
1772  std::ignore = height;
1773  std::ignore = num_events_in_waitlist;
1774  std::ignore = events_waitlist;
1775  std::ignore = event;
1776  return PI_ERROR_INVALID_OPERATION;
1777 }
1778 
1794  pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch,
1795  const void *src_ptr, size_t src_pitch, size_t width, size_t height,
1796  pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist,
1797  pi_event *event) {
1798  std::ignore = queue;
1799  std::ignore = blocking;
1800  std::ignore = dst_ptr;
1801  std::ignore = dst_pitch;
1802  std::ignore = src_ptr;
1803  std::ignore = src_pitch;
1804  std::ignore = width;
1805  std::ignore = height;
1806  std::ignore = num_events_in_waitlist;
1807  std::ignore = events_waitlist;
1808  std::ignore = event;
1809  return PI_ERROR_INVALID_OPERATION;
1810 }
1811 
1829  pi_mem_alloc_info param_name,
1830  size_t param_value_size, void *param_value,
1831  size_t *param_value_size_ret) {
1832 
1833  clGetMemAllocInfoINTEL_fn FuncPtr = nullptr;
1834  pi_result RetVal =
1835  getExtFuncFromContext<clGetMemAllocInfoName, clGetMemAllocInfoINTEL_fn>(
1836  context, &FuncPtr);
1837 
1838  if (FuncPtr) {
1839  RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr, param_name,
1840  param_value_size, param_value,
1841  param_value_size_ret));
1842  }
1843 
1844  return RetVal;
1845 }
1846 
1847 typedef CL_API_ENTRY cl_int(CL_API_CALL *clEnqueueWriteGlobalVariable_fn)(
1848  cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t,
1849  const void *, cl_uint, const cl_event *, cl_event *);
1850 
1851 typedef CL_API_ENTRY cl_int(CL_API_CALL *clEnqueueReadGlobalVariable_fn)(
1852  cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t, void *,
1853  cl_uint, const cl_event *, cl_event *);
1854 
1869  pi_queue queue, pi_program program, const char *name,
1870  pi_bool blocking_write, size_t count, size_t offset, const void *src,
1871  pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
1872  pi_event *event) {
1873  cl_context Ctx = nullptr;
1874  cl_int Res =
1875  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1876  sizeof(Ctx), &Ctx, nullptr);
1877 
1878  if (Res != CL_SUCCESS)
1879  return cast<pi_result>(Res);
1880 
1881  clEnqueueWriteGlobalVariable_fn F = nullptr;
1882  Res = getExtFuncFromContext<clEnqueueWriteGlobalVariableName, decltype(F)>(
1883  cast<pi_context>(Ctx), &F);
1884 
1885  if (!F || Res != CL_SUCCESS)
1886  return PI_ERROR_INVALID_OPERATION;
1887  Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
1888  blocking_write, count, offset, src, num_events_in_wait_list,
1889  cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
1890  return cast<pi_result>(Res);
1891 }
1892 
1907  pi_queue queue, pi_program program, const char *name, pi_bool blocking_read,
1908  size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list,
1909  const pi_event *event_wait_list, pi_event *event) {
1910  cl_context Ctx = nullptr;
1911  cl_int Res =
1912  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1913  sizeof(Ctx), &Ctx, nullptr);
1914 
1915  if (Res != CL_SUCCESS)
1916  return cast<pi_result>(Res);
1917 
1918  clEnqueueReadGlobalVariable_fn F = nullptr;
1919  Res = getExtFuncFromContext<clEnqueueReadGlobalVariableName, decltype(F)>(
1920  cast<pi_context>(Ctx), &F);
1921 
1922  if (!F || Res != CL_SUCCESS)
1923  return PI_ERROR_INVALID_OPERATION;
1924  Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
1925  blocking_read, count, offset, dst, num_events_in_wait_list,
1926  cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
1927  return cast<pi_result>(Res);
1928 }
1929 
1942  size_t param_value_size,
1943  const void *param_value) {
1944  if (param_name == PI_USM_INDIRECT_ACCESS &&
1945  *(static_cast<const pi_bool *>(param_value)) == PI_TRUE) {
1946  return USMSetIndirectAccess(kernel);
1947  } else {
1948  return cast<pi_result>(clSetKernelExecInfo(
1949  cast<cl_kernel>(kernel), param_name, param_value_size, param_value));
1950  }
1951 }
1952 
1953 typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetProgramSpecializationConstant_fn)(
1954  cl_program program, cl_uint spec_id, size_t spec_size,
1955  const void *spec_value);
1956 
1958  pi_uint32 spec_id,
1959  size_t spec_size,
1960  const void *spec_value) {
1961  cl_program ClProg = cast<cl_program>(prog);
1962  cl_context Ctx = nullptr;
1963  size_t RetSize = 0;
1964  cl_int Res =
1965  clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), &Ctx, &RetSize);
1966 
1967  if (Res != CL_SUCCESS)
1968  return cast<pi_result>(Res);
1969 
1972  decltype(F)>(cast<pi_context>(Ctx), &F);
1973 
1974  if (!F || Res != CL_SUCCESS)
1975  return PI_ERROR_INVALID_OPERATION;
1976  Res = F(ClProg, spec_id, spec_size, spec_value);
1977  return cast<pi_result>(Res);
1978 }
1979 
1986 static pi_result piextGetNativeHandle(void *piObj,
1987  pi_native_handle *nativeHandle) {
1988  assert(nativeHandle != nullptr);
1989  *nativeHandle = reinterpret_cast<pi_native_handle>(piObj);
1990  return PI_SUCCESS;
1991 }
1992 
1994  pi_native_handle *nativeHandle) {
1995  return piextGetNativeHandle(platform, nativeHandle);
1996 }
1997 
1999  pi_native_handle *nativeHandle) {
2000  return piextGetNativeHandle(device, nativeHandle);
2001 }
2002 
2004  pi_native_handle *nativeHandle) {
2005  return piextGetNativeHandle(context, nativeHandle);
2006 }
2007 
2009  pi_native_handle *nativeHandle) {
2010  return piextGetNativeHandle(queue, nativeHandle);
2011 }
2012 
2014  return piextGetNativeHandle(mem, nativeHandle);
2015 }
2016 
2018  pi_native_handle *nativeHandle) {
2019  return piextGetNativeHandle(program, nativeHandle);
2020 }
2021 
2023  pi_native_handle *nativeHandle) {
2024  return piextGetNativeHandle(kernel, nativeHandle);
2025 }
2026 
2027 // This API is called by Sycl RT to notify the end of the plugin lifetime.
2028 // Windows: dynamically loaded plugins might have been unloaded already
2029 // when this is called. Sycl RT holds onto the PI plugin so it can be
2030 // called safely. But this is not transitive. If the PI plugin in turn
2031 // dynamically loaded a different DLL, that may have been unloaded.
2032 // TODO: add a global variable lifetime management code here (see
2033 // pi_level_zero.cpp for reference) Currently this is just a NOOP.
2034 pi_result piTearDown(void *PluginParameter) {
2035  (void)PluginParameter;
2036  return PI_SUCCESS;
2037 }
2038 
2039 pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime,
2040  uint64_t *HostTime) {
2041  OCLV::OpenCLVersion devVer, platVer;
2042  cl_platform_id platform;
2043  cl_device_id deviceID = cast<cl_device_id>(Device);
2044 
2045  // TODO: Cache OpenCL version for each device and platform
2046  auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
2047  sizeof(cl_platform_id), &platform, nullptr);
2048  if (ret_err != CL_SUCCESS) {
2049  return cast<pi_result>(ret_err);
2050  }
2051 
2052  ret_err = getDeviceVersion(deviceID, devVer);
2053 
2054  if (ret_err != CL_SUCCESS) {
2055  return cast<pi_result>(ret_err);
2056  }
2057 
2058  ret_err = getPlatformVersion(platform, platVer);
2059 
2060  if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) {
2062  "OpenCL version for device and/or platform is less than 2.1",
2063  PI_ERROR_INVALID_OPERATION);
2064  return PI_ERROR_INVALID_OPERATION;
2065  }
2066 
2067  if (DeviceTime) {
2068  uint64_t dummy;
2069  clGetDeviceAndHostTimer(deviceID, DeviceTime,
2070  HostTime == nullptr ? &dummy : HostTime);
2071 
2072  } else if (HostTime) {
2073  clGetHostTimer(deviceID, HostTime);
2074  }
2075 
2076  return PI_SUCCESS;
2077 }
2078 
2080 
2082  // Check that the major version matches in PiVersion and SupportedVersion
2084 
2085  // PI interface supports higher version or the same version.
2086  size_t PluginVersionSize = sizeof(PluginInit->PluginVersion);
2087  if (strlen(SupportedVersion) >= PluginVersionSize)
2088  return PI_ERROR_INVALID_VALUE;
2089  strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize);
2090 
2091 #define _PI_CL(pi_api, ocl_api) \
2092  (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
2093 
2094  // Platform
2096  _PI_CL(piPlatformGetInfo, clGetPlatformInfo)
2100  // Device
2103  _PI_CL(piDevicePartition, clCreateSubDevices)
2104  _PI_CL(piDeviceRetain, clRetainDevice)
2105  _PI_CL(piDeviceRelease, clReleaseDevice)
2110  // Context
2113  _PI_CL(piContextRetain, clRetainContext)
2114  _PI_CL(piContextRelease, clReleaseContext)
2117  // Queue
2121  _PI_CL(piQueueFinish, clFinish)
2122  _PI_CL(piQueueFlush, clFlush)
2123  _PI_CL(piQueueRetain, clRetainCommandQueue)
2124  _PI_CL(piQueueRelease, clReleaseCommandQueue)
2127  // Memory
2130  _PI_CL(piMemGetInfo, clGetMemObjectInfo)
2131  _PI_CL(piMemImageGetInfo, clGetImageInfo)
2132  _PI_CL(piMemRetain, clRetainMemObject)
2133  _PI_CL(piMemRelease, clReleaseMemObject)
2137  // Program
2141  _PI_CL(piProgramGetInfo, clGetProgramInfo)
2142  _PI_CL(piProgramCompile, clCompileProgram)
2143  _PI_CL(piProgramBuild, clBuildProgram)
2145  _PI_CL(piProgramGetBuildInfo, clGetProgramBuildInfo)
2146  _PI_CL(piProgramRetain, clRetainProgram)
2147  _PI_CL(piProgramRelease, clReleaseProgram)
2152  // Kernel
2154  _PI_CL(piKernelSetArg, clSetKernelArg)
2155  _PI_CL(piKernelGetInfo, clGetKernelInfo)
2158  _PI_CL(piKernelRetain, clRetainKernel)
2159  _PI_CL(piKernelRelease, clReleaseKernel)
2164  // Event
2166  _PI_CL(piEventGetInfo, clGetEventInfo)
2167  _PI_CL(piEventGetProfilingInfo, clGetEventProfilingInfo)
2168  _PI_CL(piEventsWait, clWaitForEvents)
2169  _PI_CL(piEventSetCallback, clSetEventCallback)
2170  _PI_CL(piEventSetStatus, clSetUserEventStatus)
2171  _PI_CL(piEventRetain, clRetainEvent)
2172  _PI_CL(piEventRelease, clReleaseEvent)
2175  // Sampler
2177  _PI_CL(piSamplerGetInfo, clGetSamplerInfo)
2178  _PI_CL(piSamplerRetain, clRetainSampler)
2179  _PI_CL(piSamplerRelease, clReleaseSampler)
2180  // Queue commands
2181  _PI_CL(piEnqueueKernelLaunch, clEnqueueNDRangeKernel)
2182  _PI_CL(piEnqueueNativeKernel, clEnqueueNativeKernel)
2183  _PI_CL(piEnqueueEventsWait, clEnqueueMarkerWithWaitList)
2184  _PI_CL(piEnqueueEventsWaitWithBarrier, clEnqueueBarrierWithWaitList)
2185  _PI_CL(piEnqueueMemBufferRead, clEnqueueReadBuffer)
2186  _PI_CL(piEnqueueMemBufferReadRect, clEnqueueReadBufferRect)
2187  _PI_CL(piEnqueueMemBufferWrite, clEnqueueWriteBuffer)
2188  _PI_CL(piEnqueueMemBufferWriteRect, clEnqueueWriteBufferRect)
2189  _PI_CL(piEnqueueMemBufferCopy, clEnqueueCopyBuffer)
2190  _PI_CL(piEnqueueMemBufferCopyRect, clEnqueueCopyBufferRect)
2191  _PI_CL(piEnqueueMemBufferFill, clEnqueueFillBuffer)
2192  _PI_CL(piEnqueueMemImageRead, clEnqueueReadImage)
2193  _PI_CL(piEnqueueMemImageWrite, clEnqueueWriteImage)
2194  _PI_CL(piEnqueueMemImageCopy, clEnqueueCopyImage)
2195  _PI_CL(piEnqueueMemImageFill, clEnqueueFillImage)
2197  _PI_CL(piEnqueueMemUnmap, clEnqueueUnmapMemObject)
2198  // USM
2211  // Device global variable
2216 
2222 
2223 #undef _PI_CL
2224 
2225  return PI_SUCCESS;
2226 }
2227 
2228 #ifdef _WIN32
2229 #define __SYCL_PLUGIN_DLL_NAME "pi_opencl.dll"
2230 #include "../common_win_pi_trace/common_win_pi_trace.hpp"
2231 #undef __SYCL_PLUGIN_DLL_NAME
2232 #endif
2233 
2234 } // end extern 'C'
piclProgramCreateWithSource
pi_result piclProgramCreateWithSource(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, pi_program *ret_program)
Definition: pi_opencl.cpp:1202
piContextCreate
pi_result piContextCreate(const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data1), void *user_data, pi_context *retcontext)
Definition: pi_opencl.cpp:1081
piEventGetProfilingInfo
pi_result piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1440
_pi_mem
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:224
piEventRelease
pi_result piEventRelease(pi_event event)
Definition: pi_esimd_emulator.cpp:1488
piEnqueueKernelLaunch
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1813
piextPlatformGetNativeHandle
pi_result piextPlatformGetNativeHandle(pi_platform platform, pi_native_handle *nativeHandle)
Gets the native handle of a PI platform object.
Definition: pi_opencl.cpp:1993
sycl::_V1::opencl::cl_int
std::int32_t cl_int
Definition: aliases.hpp:136
piMemGetInfo
pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1096
piextProgramGetNativeHandle
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
Definition: pi_opencl.cpp:2017
PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
Definition: pi.h:354
sycl::_V1::opencl::cl_uint
std::uint32_t cl_uint
Definition: aliases.hpp:137
clGetMemAllocInfoName
CONSTFIX char clGetMemAllocInfoName[]
Definition: pi_opencl.cpp:65
_pi_context_info
_pi_context_info
Definition: pi.h:347
pi.h
piKernelSetArg
pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Definition: pi_esimd_emulator.cpp:1370
piPluginInit
pi_result piPluginInit(pi_plugin *PluginInit)
Definition: pi_opencl.cpp:2081
piextEnqueueDeviceGlobalVariableRead
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
Definition: pi_opencl.cpp:1906
piextProgramCreateWithNativeHandle
pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, bool, pi_program *piProgram)
Creates PI program object from a native handle.
Definition: pi_opencl.cpp:927
PI_DEVICE_INFO_GPU_EU_COUNT
@ PI_DEVICE_INFO_GPU_EU_COUNT
Definition: pi.h:299
piMemBufferCreate
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties)
Definition: pi_opencl.cpp:1140
pi_bool
pi_uint32 pi_bool
Definition: pi.h:131
piPlatformsGet
pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
Definition: pi_opencl.cpp:627
CHECK_ERR_SET_NULL_RET
#define CHECK_ERR_SET_NULL_RET(err, ptr, reterr)
Definition: pi_opencl.cpp:34
piKernelGetGroupInfo
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_opencl.cpp:1257
PI_QUEUE_FLAG_ON_DEVICE
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE
Definition: pi.h:626
PI_MEMORY_ORDER_ACQUIRE
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQUIRE
Definition: pi.h:562
piextGetDeviceFunctionPointer
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *function_pointer_ret)
Retrieves a device function pointer to a user-defined function.
Definition: pi_opencl.cpp:1017
clCreateBufferWithPropertiesName
CONSTFIX char clCreateBufferWithPropertiesName[]
Definition: pi_opencl.cpp:60
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
Definition: pi.h:360
PI_DEVICE_INFO_IMAGE_SRGB
@ PI_DEVICE_INFO_IMAGE_SRGB
Definition: pi.h:305
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_esimd_emulator.cpp:1353
piextProgramSetSpecializationConstant
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_opencl.cpp:1957
_pi_plugin
Definition: pi.h:1992
sycl::_V1::opencl::cl_ulong
std::uint64_t cl_ulong
Definition: aliases.hpp:139
piDevicePartition
pi_result piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
Definition: pi_esimd_emulator.cpp:822
is_in_separated_string
static bool is_in_separated_string(const std::string &str, char delimiter, const std::string &sub_str)
Definition: pi_opencl.cpp:992
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:465
PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
@ PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
Definition: pi.h:214
piEnqueueMemBufferCopy
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1621
clSharedMemAllocName
CONSTFIX char clSharedMemAllocName[]
Definition: pi_opencl.cpp:58
_pi_plugin::PluginVersion
char PluginVersion[20]
Definition: pi.h:2002
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
@ PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
Definition: pi.h:321
piDevicesGet
pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
Definition: pi_opencl.cpp:650
_pi_result
_pi_result
Definition: pi.h:140
sycl::_V1::detail::memcpy
void memcpy(void *Dst, const void *Src, size_t Size)
Definition: memcpy.hpp:16
piTearDown
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
Definition: pi_opencl.cpp:2034
pi_context_properties
intptr_t pi_context_properties
Definition: pi.h:546
piEnqueueMemUnmap
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1684
PI_SAMPLER_INFO_FILTER_MODE
@ PI_SAMPLER_INFO_FILTER_MODE
Definition: pi.h:527
sycl::_V1::ext::oneapi::experimental::alignment
constexpr alignment_key::value_t< K > alignment
Definition: properties.hpp:349
piextKernelGetNativeHandle
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
Definition: pi_opencl.cpp:2022
piSamplerGetInfo
pi_result piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1522
piEventCreate
pi_result piEventCreate(pi_context context, pi_event *ret_event)
Create PI event object in a signalled/completed state.
Definition: pi_opencl.cpp:1325
piextUSMEnqueueMemset
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
Definition: pi_opencl.cpp:1553
piPlatformGetInfo
pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:444
PI_DEVICE_INFO_GPU_SLICES
@ PI_DEVICE_INFO_GPU_SLICES
Definition: pi.h:301
OCLV::OpenCLVersion::isValid
bool isValid() const
Definition: pi_opencl.hpp:90
piextUSMDeviceAlloc
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
Definition: pi_opencl.cpp:1418
piDeviceRetain
pi_result piDeviceRetain(pi_device device)
Definition: pi_esimd_emulator.cpp:572
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
_pi_device_type
_pi_device_type
Definition: pi.h:187
piQueueRelease
pi_result piQueueRelease(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:986
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_opencl.cpp:1214
CONSTFIX
#define CONSTFIX
Definition: pi_opencl.cpp:52
piMemBufferPartition
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_opencl.cpp:1180
PI_MEMORY_SCOPE_WORK_ITEM
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM
Definition: pi.h:568
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
Definition: pi.h:303
piextUSMEnqueueMemAdvise
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_opencl.cpp:1681
piextMemCreateWithNativeHandle
pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *piMem)
Creates PI mem object from a native handle.
Definition: pi_opencl.cpp:1192
PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
Definition: pi.h:355
piextEnqueueDeviceGlobalVariableWrite
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API for writing data from host to a device global variable.
Definition: pi_opencl.cpp:1868
piGetDeviceAndHostTimer
pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime)
Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the h...
Definition: pi_opencl.cpp:2039
PI_USM_INDIRECT_ACCESS
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1377
piextUSMHostAlloc
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
Definition: pi_opencl.cpp:1382
piextEventCreateWithNativeHandle
pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *piEvent)
Creates PI event object from a native handle.
Definition: pi_opencl.cpp:1338
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
_pi_platform
A PI platform stores all known PI devices, in the CUDA plugin this is just a vector of available devi...
Definition: pi_cuda.hpp:74
piEnqueueMemImageFill
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1801
PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
@ PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
Definition: pi.h:286
OCLV::V2_1
const OpenCLVersion V2_1(2, 1)
_pi_sampler_addressing_mode
_pi_sampler_addressing_mode
Definition: pi.h:533
piKernelRelease
pi_result piKernelRelease(pi_kernel kernel)
Definition: pi_esimd_emulator.cpp:1400
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:793
getDeviceVersion
static cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version)
Definition: pi_opencl.cpp:118
cast
To cast(From value)
Definition: pi_opencl.cpp:42
_pi_device_info
_pi_device_info
Definition: pi.h:209
SupportedVersion
const char SupportedVersion[]
Definition: pi_opencl.cpp:2079
MaxMessageSize
constexpr size_t MaxMessageSize
Definition: pi_opencl.cpp:78
piEventSetStatus
pi_result piEventSetStatus(pi_event event, pi_int32 execution_status)
Definition: pi_esimd_emulator.cpp:1476
piextContextGetNativeHandle
pi_result piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle)
Gets the native handle of a PI context object.
Definition: pi_opencl.cpp:2003
sycl::_V1::ext::intel::host_ptr
multi_ptr< ElementType, access::address_space::ext_intel_global_host_space, IsDecorated > host_ptr
Definition: usm_pointers.hpp:32
PI_SAMPLER_FILTER_MODE_NEAREST
@ PI_SAMPLER_FILTER_MODE_NEAREST
Definition: pi.h:542
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:816
piQueueCreate
pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
Definition: pi_opencl.cpp:759
piextContextCreateWithNativeHandle
pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext)
Creates PI context object from a native handle.
Definition: pi_opencl.cpp:1096
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1325
clEnqueueReadGlobalVariableName
CONSTFIX char clEnqueueReadGlobalVariableName[]
Definition: pi_opencl.cpp:72
_pi_plugin::PiVersion
char PiVersion[20]
Definition: pi.h:2000
piextDeviceCreateWithNativeHandle
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform, pi_device *piDevice)
Creates PI device object from a native handle.
Definition: pi_opencl.cpp:738
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:361
_pi_sampler_filter_mode
_pi_sampler_filter_mode
Definition: pi.h:541
pi_opencl.hpp
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
Definition: pi.h:320
_pi_queue_info
_pi_queue_info
Definition: pi.h:364
_pi_buffer_create_type
_pi_buffer_create_type
Definition: pi.h:517
clMemBlockingFreeName
CONSTFIX char clMemBlockingFreeName[]
Definition: pi_opencl.cpp:59
clGetDeviceFunctionPointerName
CONSTFIX char clGetDeviceFunctionPointerName[]
Definition: pi_opencl.cpp:68
piEnqueueMemBufferMap
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_opencl.cpp:1354
piextPlatformCreateWithNativeHandle
pi_result piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform *platform)
Creates PI platform object from a native handle.
Definition: pi_opencl.cpp:642
PI_MEMORY_ORDER_RELAXED
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED
Definition: pi.h:561
piextUSMEnqueuePrefetch
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_opencl.cpp:1631
piEventsWait
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_esimd_emulator.cpp:1453
PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS
@ PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS
Definition: pi.h:324
piContextRelease
pi_result piContextRelease(pi_context context)
Definition: pi_esimd_emulator.cpp:897
_pi_queue
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:395
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1355
checkDeviceExtensions
static cl_int checkDeviceExtensions(cl_device_id dev, const std::vector< std::string > &exts, bool &supported)
Definition: pi_opencl.cpp:138
pi_uint32
uint32_t pi_uint32
Definition: pi.h:129
OCLV::V2_0
const OpenCLVersion V2_0(2, 0)
piEnqueueMemBufferReadRect
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1599
piMemRelease
pi_result piMemRelease(pi_mem mem)
Definition: pi_esimd_emulator.cpp:1108
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:842
piEnqueueMemBufferCopyRect
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1627
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Definition: pi_opencl.cpp:1110
piKernelGetSubGroupInfo
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information from the sub-group from a kernel.
Definition: pi_opencl.cpp:1277
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_opencl.cpp:265
clGetDeviceFunctionPointer_fn
CL_API_ENTRY cl_int(CL_API_CALL * clGetDeviceFunctionPointer_fn)(cl_device_id device, cl_program program, const char *FuncName, cl_ulong *ret_ptr)
Definition: pi_opencl.cpp:1014
cl.h
_pi_kernel_exec_info
_pi_kernel_exec_info
Definition: pi.h:1375
PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
Definition: pi.h:628
piEnqueueMemBufferWrite
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1607
PI_MEMORY_SCOPE_DEVICE
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE
Definition: pi.h:571
pi_mem_flags
pi_bitfield pi_mem_flags
Definition: pi.h:585
PI_MEMORY_SCOPE_SYSTEM
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM
Definition: pi.h:572
piextGetNativeHandle
static pi_result piextGetNativeHandle(void *piObj, pi_native_handle *nativeHandle)
Common API for getting the native handle of a PI object.
Definition: pi_opencl.cpp:1986
piEnqueueMemImageRead
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1724
clDeviceMemAllocName
CONSTFIX char clDeviceMemAllocName[]
Definition: pi_opencl.cpp:57
ErrorMessageCode
thread_local pi_result ErrorMessageCode
Definition: pi_opencl.cpp:79
clEnqueueReadGlobalVariable_fn
CL_API_ENTRY cl_int(CL_API_CALL * clEnqueueReadGlobalVariable_fn)(cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *)
Definition: pi_opencl.cpp:1851
PI_KERNEL_GROUP_INFO_NUM_REGS
@ PI_KERNEL_GROUP_INFO_NUM_REGS
Definition: pi.h:393
PI_MEMORY_SCOPE_SUB_GROUP
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP
Definition: pi.h:569
piQueueGetInfo
pi_result piQueueGetInfo(pi_queue queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_opencl.cpp:804
PI_KERNEL_MAX_SUB_GROUP_SIZE
@ PI_KERNEL_MAX_SUB_GROUP_SIZE
Definition: pi.h:407
ErrorMessage
thread_local char ErrorMessage[MaxMessageSize]
Definition: pi_opencl.cpp:80
piQueueRetain
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:978
PI_MEMORY_ORDER_SEQ_CST
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST
Definition: pi.h:565
piEnqueueEventsWaitWithBarrier
pi_result piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1536
getExtFuncFromContext
static pi_result getExtFuncFromContext(pi_context context, T *fptr)
Definition: pi_opencl.cpp:163
piKernelRetain
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_esimd_emulator.cpp:1398
piEventSetCallback
pi_result piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, void(*pfn_notify)(pi_event event, pi_int32 event_command_status, void *user_data), void *user_data)
PI_MEMORY_ORDER_RELEASE
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE
Definition: pi.h:563
piEnqueueMemBufferFill
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1635
_PI_OPENCL_PLUGIN_VERSION_STRING
#define _PI_OPENCL_PLUGIN_VERSION_STRING
Definition: pi_opencl.hpp:28
clEnqueueWriteGlobalVariable_fn
CL_API_ENTRY cl_int(CL_API_CALL * clEnqueueWriteGlobalVariable_fn)(cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *)
Definition: pi_opencl.cpp:1847
piSamplerRetain
pi_result piSamplerRetain(pi_sampler sampler)
Definition: pi_esimd_emulator.cpp:1527
PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
Definition: pi.h:357
piMemImageGetInfo
pi_result piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1720
piextUSMEnqueueMemcpy2D
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
Definition: pi_opencl.cpp:1793
piextEventGetNativeHandle
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
Definition: pi_esimd_emulator.cpp:1509
OCLV::OpenCLVersion
Definition: pi_opencl.hpp:32
pi_uint64
uint64_t pi_uint64
Definition: pi.h:130
pi_memory_scope_capabilities
pi_bitfield pi_memory_scope_capabilities
Definition: pi.h:567
_pi_device_binary_property_struct
Definition: pi.h:741
pi_mem_properties
pi_bitfield pi_mem_properties
Definition: pi.h:601
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:760
_pi_sampler
Implementation of samplers for CUDA.
Definition: pi_cuda.hpp:987
PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
@ PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
Definition: pi.h:373
piextUSMEnqueueMemset2D
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
Definition: pi_opencl.cpp:1763
PI_QUEUE_FLAG_ON_DEVICE_DEFAULT
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT
Definition: pi.h:627
sycl::_V1::info::device_type
device_type
Definition: info_desc.hpp:44
piDeviceRelease
pi_result piDeviceRelease(pi_device device)
Definition: pi_esimd_emulator.cpp:582
PI_DEVICE_INFO_UUID
@ PI_DEVICE_INFO_UUID
Definition: pi.h:295
piextUSMSharedAlloc
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
Definition: pi_opencl.cpp:1456
PI_DEVICE_INFO_PCI_ADDRESS
@ PI_DEVICE_INFO_PCI_ADDRESS
Definition: pi.h:298
piextDeviceSelectBinary
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, pi_uint32 num_images, pi_uint32 *selected_image_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_opencl.cpp:667
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:133
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_opencl.cpp:1941
piQueueFinish
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:1000
piContextRetain
pi_result piContextRetain(pi_context context)
Definition: pi_esimd_emulator.cpp:887
piPluginGetLastError
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
Definition: pi_opencl.cpp:91
pi_sampler_properties
pi_bitfield pi_sampler_properties
Definition: pi.h:554
clEnqueueMemsetName
CONSTFIX char clEnqueueMemsetName[]
Definition: pi_opencl.cpp:63
clSetProgramSpecializationConstant_fn
CL_API_ENTRY cl_int(CL_API_CALL * clSetProgramSpecializationConstant_fn)(cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value)
Definition: pi_opencl.cpp:1953
piProgramCreate
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_opencl.cpp:837
_pi_image_format
Definition: pi.h:975
clEnqueueWriteGlobalVariableName
CONSTFIX char clEnqueueWriteGlobalVariableName[]
Definition: pi_opencl.cpp:70
clHostMemAllocName
CONSTFIX char clHostMemAllocName[]
Definition: pi_opencl.cpp:56
clSetKernelArgMemPointerName
CONSTFIX char clSetKernelArgMemPointerName[]
Definition: pi_opencl.cpp:62
PI_MEMORY_SCOPE_WORK_GROUP
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP
Definition: pi.h:570
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
Definition: pi.h:328
iostream_proxy.hpp
PI_PROGRAM_INFO_KERNEL_NAMES
@ PI_PROGRAM_INFO_KERNEL_NAMES
Definition: pi.h:344
setErrorMessage
static void setErrorMessage(const char *message, pi_result error_code)
Definition: pi_opencl.cpp:83
piextUSMEnqueueFill2D
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Fill API.
Definition: pi_opencl.cpp:1732
USMSetIndirectAccess
static pi_result USMSetIndirectAccess(pi_kernel kernel)
Enables indirect access of pointers in kernels.
Definition: pi_opencl.cpp:223
_PI_PLUGIN_VERSION_CHECK
#define _PI_PLUGIN_VERSION_CHECK(PI_API_VERSION, PI_PLUGIN_VERSION)
Definition: pi.h:107
piEnqueueMemBufferRead
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1541
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:307
PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
@ PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
Definition: pi.h:213
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1383
piEventGetInfo
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1404
piEnqueueEventsWait
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1531
piMemRetain
pi_result piMemRetain(pi_mem mem)
Definition: pi_esimd_emulator.cpp:1100
PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
@ PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
Definition: pi.h:304
PI_MEMORY_ORDER_ACQ_REL
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL
Definition: pi.h:564
piextUSMGetMemAllocInfo
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
Definition: pi_opencl.cpp:1828
pi_queue_properties
pi_bitfield pi_queue_properties
Definition: pi.h:620
PI_DEVICE_INFO_ATOMIC_64
@ PI_DEVICE_INFO_ATOMIC_64
Definition: pi.h:318
piEventRetain
pi_result piEventRetain(pi_event event)
Definition: pi_esimd_emulator.cpp:1478
_pi_image_desc
Definition: pi.h:980
piextKernelSetArgMemObj
pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem *arg_value)
Definition: pi_opencl.cpp:966
_pi_event
PI Event mapping to CUevent.
Definition: pi_cuda.hpp:632
piextMemGetNativeHandle
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_opencl.cpp:2013
PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
@ PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
Definition: pi.h:302
getPlatformVersion
static cl_int getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version)
Definition: pi_opencl.cpp:96
piextDeviceGetNativeHandle
pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native handle of a PI device object.
Definition: pi_opencl.cpp:1998
piEnqueueMemBufferWriteRect
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1613
piSamplerCreate
pi_result piSamplerCreate(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
Definition: pi_opencl.cpp:935
PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES
@ PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES
Definition: pi.h:317
piProgramGetBuildInfo
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1348
piextKernelCreateWithNativeHandle
pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_program, bool, pi_kernel *piKernel)
Creates PI kernel object from a native handle.
Definition: pi_opencl.cpp:980
sycl::_V1::opencl::cl_bool
bool cl_bool
Definition: aliases.hpp:131
piMemImageCreate
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_opencl.cpp:1166
piQueueFlush
pi_result piQueueFlush(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:1007
_pi_usm_migration_flags
_pi_usm_migration_flags
Definition: pi.h:1712
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
Definition: pi.h:788
PI_QUEUE_FLAG_PROFILING_ENABLE
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
Definition: pi.h:625
PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
Definition: pi.h:624
piEnqueueNativeKernel
pi_result piEnqueueNativeKernel(pi_queue queue, void(*user_func)(void *), void *args, size_t cb_args, pi_uint32 num_mem_objects, const pi_mem *mem_list, const void **args_mem_loc, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1883
piextUSMEnqueueMemcpy
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_opencl.cpp:1592
clSetProgramSpecializationConstantName
CONSTFIX char clSetProgramSpecializationConstantName[]
Definition: pi_opencl.cpp:66
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:792
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
Definition: pi.h:331
piextQueueCreateWithNativeHandle
pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_device, bool ownNativeHandle, pi_queue *piQueue)
Creates PI queue object from a native handle.
Definition: pi_opencl.cpp:826
_PI_CL
#define _PI_CL(pi_api, ocl_api)
piEnqueueMemImageCopy
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1795
_pi_kernel_group_info
_pi_kernel_group_info
Definition: pi.h:385
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:1230
pfn_notify
void(* pfn_notify)(pi_event event, pi_int32 eventCommandStatus, void *userData)
Definition: pi_cuda.hpp:628
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
Definition: pi.h:319
OCLV::V3_0
const OpenCLVersion V3_0(3, 0)
piextKernelSetArgSampler
pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
Definition: pi_opencl.cpp:973
piextKernelSetArgPointer
pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Sets up pointer arguments for CL kernels.
Definition: pi_opencl.cpp:1512
PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
Definition: pi.h:356
piSamplerRelease
pi_result piSamplerRelease(pi_sampler sampler)
Definition: pi_esimd_emulator.cpp:1529
pi_memory_order_capabilities
pi_bitfield pi_memory_order_capabilities
Definition: pi.h:560
PI_SAMPLER_INFO_NORMALIZED_COORDS
@ PI_SAMPLER_INFO_NORMALIZED_COORDS
Definition: pi.h:525
_pi_mem_alloc_info
_pi_mem_alloc_info
Definition: pi.h:1695
PI_TRUE
const pi_bool PI_TRUE
Definition: pi.h:519
piEnqueueMemImageWrite
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1789
pi_usm_mem_properties
pi_bitfield pi_usm_mem_properties
Definition: pi.h:607
pi_map_flags
pi_bitfield pi_map_flags
Definition: pi.h:595
piProgramBuild
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:791
PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
@ PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
Definition: pi.h:300
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_opencl.cpp:1248
piextQueueGetNativeHandle
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle)
Gets the native handle of a PI queue object.
Definition: pi_opencl.cpp:2008
piextQueueCreate
pi_result piextQueueCreate(pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue)
Definition: pi_opencl.cpp:745
clEnqueueMemcpyName
CONSTFIX char clEnqueueMemcpyName[]
Definition: pi_opencl.cpp:64
piextUSMFree
pi_result piextUSMFree(pi_context context, void *ptr)
Frees allocated USM memory in a blocking manner.
Definition: pi_opencl.cpp:1488
PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
Definition: pi.h:359
pi_int32
int32_t pi_int32
Definition: pi.h:128
_pi_context
PI context mapping to a CUDA context object.
Definition: pi_cuda.hpp:170
PI_QUEUE_FLAGS
constexpr pi_queue_properties PI_QUEUE_FLAGS
Definition: pi.h:621
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:83
_pi_kernel_sub_group_info
_pi_kernel_sub_group_info
Definition: pi.h:406
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
Definition: pi.h:332
PI_SAMPLER_INFO_ADDRESSING_MODE
@ PI_SAMPLER_INFO_ADDRESSING_MODE
Definition: pi.h:526
PI_SAMPLER_ADDRESSING_MODE_CLAMP
@ PI_SAMPLER_ADDRESSING_MODE_CLAMP
Definition: pi.h:537