DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_opencl.cpp
Go to the documentation of this file.
1 //==---------- pi_opencl.cpp - OpenCL Plugin -------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
10 
16 
17 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
18 
19 #include <pi_opencl.hpp>
20 #include <sycl/detail/cl.h>
22 #include <sycl/detail/pi.h>
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <cstring>
27 #include <limits>
28 #include <map>
29 #include <memory>
30 #include <mutex>
31 #include <sstream>
32 #include <string>
33 #include <string_view>
34 #include <vector>
35 
36 #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \
37  if (err != CL_SUCCESS) { \
38  if (ptr != nullptr) \
39  *ptr = nullptr; \
40  return cast<pi_result>(reterr); \
41  }
42 
43 // Want all the needed casts be explicit, do not define conversion operators.
44 template <class To, class From> To cast(From value) {
45  // TODO: see if more sanity checks are possible.
46  static_assert(sizeof(From) == sizeof(To), "cast failed size check");
47  return (To)(value);
48 }
49 
50 // Older versions of GCC don't like "const" here
51 #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2))
52 #define CONSTFIX constexpr
53 #else
54 #define CONSTFIX const
55 #endif
56 
57 // Names of USM functions that are queried from OpenCL
58 CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL";
59 CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL";
60 CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL";
61 CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL";
63  "clCreateBufferWithPropertiesINTEL";
64 CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL";
65 CONSTFIX char clEnqueueMemFillName[] = "clEnqueueMemFillINTEL";
66 CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL";
67 CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL";
69  "clSetProgramSpecializationConstant";
71  "clGetDeviceFunctionPointerINTEL";
73  "clEnqueueWriteGlobalVariableINTEL";
75  "clEnqueueReadGlobalVariableINTEL";
76 // Names of host pipe functions queried from OpenCL
77 CONSTFIX char clEnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL";
78 CONSTFIX char clEnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL";
79 
80 #undef CONSTFIX
81 
82 // Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR
83 constexpr size_t MaxMessageSize = 256;
84 thread_local pi_result ErrorMessageCode = PI_SUCCESS;
85 thread_local char ErrorMessage[MaxMessageSize];
86 
87 // Utility function for setting a message and warning
88 [[maybe_unused]] static void setErrorMessage(const char *message,
89  pi_result error_code) {
90  assert(strlen(message) <= MaxMessageSize);
91  strcpy(ErrorMessage, message);
92  ErrorMessageCode = error_code;
93 }
94 
95 // Returns plugin specific error and warning messages
97  *message = &ErrorMessage[0];
98  return ErrorMessageCode;
99 }
100 
101 // Returns plugin specific backend option.
102 pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option,
103  const char **backend_option) {
104  using namespace std::literals;
105  if (frontend_option == nullptr)
106  return PI_ERROR_INVALID_VALUE;
107  if (frontend_option == ""sv) {
108  *backend_option = "";
109  return PI_SUCCESS;
110  }
111  // Return '-cl-opt-disable' for frontend_option = -O0 and '' for others.
112  if (!strcmp(frontend_option, "-O0")) {
113  *backend_option = "-cl-opt-disable";
114  return PI_SUCCESS;
115  }
116  if (frontend_option == "-O1"sv || frontend_option == "-O2"sv ||
117  frontend_option == "-O3"sv) {
118  *backend_option = "";
119  return PI_SUCCESS;
120  }
121  if (frontend_option == "-ftarget-compile-fast"sv) {
122  *backend_option = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'";
123  return PI_SUCCESS;
124  }
125  return PI_ERROR_INVALID_VALUE;
126 }
127 
128 static cl_int getPlatformVersion(cl_platform_id plat,
129  OCLV::OpenCLVersion &version) {
130  cl_int ret_err = CL_INVALID_VALUE;
131 
132  size_t platVerSize = 0;
133  ret_err =
134  clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0, nullptr, &platVerSize);
135 
136  std::string platVer(platVerSize, '\0');
137  ret_err = clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize,
138  platVer.data(), nullptr);
139 
140  if (ret_err != CL_SUCCESS)
141  return ret_err;
142 
143  version = OCLV::OpenCLVersion(platVer);
144  if (!version.isValid())
145  return CL_INVALID_PLATFORM;
146 
147  return ret_err;
148 }
149 
150 static cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) {
151  cl_int ret_err = CL_INVALID_VALUE;
152 
153  size_t devVerSize = 0;
154  ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize);
155 
156  std::string devVer(devVerSize, '\0');
157  ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(),
158  nullptr);
159 
160  if (ret_err != CL_SUCCESS)
161  return ret_err;
162 
163  version = OCLV::OpenCLVersion(devVer);
164  if (!version.isValid())
165  return CL_INVALID_DEVICE;
166 
167  return ret_err;
168 }
169 
170 static cl_int checkDeviceExtensions(cl_device_id dev,
171  const std::vector<std::string> &exts,
172  bool &supported) {
173  cl_int ret_err = CL_INVALID_VALUE;
174 
175  size_t extSize = 0;
176  ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize);
177 
178  std::string extStr(extSize, '\0');
179  ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(),
180  nullptr);
181 
182  if (ret_err != CL_SUCCESS)
183  return ret_err;
184 
185  supported = true;
186  for (const std::string &ext : exts)
187  if (!(supported = (extStr.find(ext) != std::string::npos)))
188  break;
189 
190  return ret_err;
191 }
192 
193 using clGetDeviceFunctionPointer_fn = CL_API_ENTRY
194 cl_int(CL_API_CALL *)(cl_device_id device, cl_program program,
195  const char *FuncName, cl_ulong *ret_ptr);
196 
197 using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY
198 cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool,
199  size_t, size_t, const void *, cl_uint, const cl_event *,
200  cl_event *);
201 
202 using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY
203 cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool,
204  size_t, size_t, void *, cl_uint, const cl_event *,
205  cl_event *);
206 
207 using clSetProgramSpecializationConstant_fn = CL_API_ENTRY
208 cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size,
209  const void *spec_value);
210 
211 template <typename T> struct FuncPtrCache {
212  std::map<cl_context, T> Map;
213  std::mutex Mutex;
214 };
215 
216 // FIXME: There's currently no mechanism for cleaning up this cache, meaning
217 // that it is invalidated whenever a context is destroyed. This could lead to
218 // reusing an invalid function pointer if another context happends to have the
219 // same native handle.
240 };
241 // A raw pointer is used here since the lifetime of this map has to be tied to
242 // piTeardown to avoid issues with static destruction order (a user application
243 // might have static objects that indirectly access this cache in their
244 // destructor).
246 
247 // USM helper function to get an extension function pointer
248 template <typename T>
249 static pi_result getExtFuncFromContext(cl_context context,
250  FuncPtrCache<T> &FPtrCache,
251  const char *FuncName, T *fptr) {
252  // TODO
253  // Potentially redo caching as PI interface changes.
254  // if cached, return cached FuncPtr
255  std::lock_guard<std::mutex> CacheLock{FPtrCache.Mutex};
256  std::map<cl_context, T> &FPtrMap = FPtrCache.Map;
257  auto It = FPtrMap.find(context);
258  if (It != FPtrMap.end()) {
259  auto F = It->second;
260  // if cached that extension is not available return nullptr and
261  // PI_ERROR_INVALID_VALUE
262  *fptr = F;
263  return F ? PI_SUCCESS : PI_ERROR_INVALID_VALUE;
264  }
265 
266  cl_uint deviceCount;
267  cl_int ret_err = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
268  sizeof(cl_uint), &deviceCount, nullptr);
269 
270  if (ret_err != CL_SUCCESS || deviceCount < 1) {
271  return PI_ERROR_INVALID_CONTEXT;
272  }
273 
274  std::vector<cl_device_id> devicesInCtx(deviceCount);
275  ret_err = clGetContextInfo(context, CL_CONTEXT_DEVICES,
276  deviceCount * sizeof(cl_device_id),
277  devicesInCtx.data(), nullptr);
278 
279  if (ret_err != CL_SUCCESS) {
280  return PI_ERROR_INVALID_CONTEXT;
281  }
282 
283  cl_platform_id curPlatform;
284  ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
285  sizeof(cl_platform_id), &curPlatform, nullptr);
286 
287  if (ret_err != CL_SUCCESS) {
288  return PI_ERROR_INVALID_CONTEXT;
289  }
290 
291  T FuncPtr =
292  (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
293 
294  if (!FuncPtr) {
295  // Cache that the extension is not available
296  FPtrMap[context] = nullptr;
297  return PI_ERROR_INVALID_VALUE;
298  }
299 
300  *fptr = FuncPtr;
301  FPtrMap[context] = FuncPtr;
302 
303  return cast<pi_result>(ret_err);
304 }
305 
311  // We test that each alloc type is supported before we actually try to
312  // set KernelExecInfo.
313  cl_bool TrueVal = CL_TRUE;
314  clHostMemAllocINTEL_fn HFunc = nullptr;
315  clSharedMemAllocINTEL_fn SFunc = nullptr;
316  clDeviceMemAllocINTEL_fn DFunc = nullptr;
317  cl_context CLContext;
318  cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
319  sizeof(cl_context), &CLContext, nullptr);
320  if (CLErr != CL_SUCCESS) {
321  return cast<pi_result>(CLErr);
322  }
323 
324  getExtFuncFromContext<clHostMemAllocINTEL_fn>(
326  &HFunc);
327  if (HFunc) {
328  clSetKernelExecInfo(cast<cl_kernel>(kernel),
329  CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
330  sizeof(cl_bool), &TrueVal);
331  }
332 
333  getExtFuncFromContext<clDeviceMemAllocINTEL_fn>(
335  clDeviceMemAllocName, &DFunc);
336  if (DFunc) {
337  clSetKernelExecInfo(cast<cl_kernel>(kernel),
338  CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
339  sizeof(cl_bool), &TrueVal);
340  }
341 
342  getExtFuncFromContext<clSharedMemAllocINTEL_fn>(
344  clSharedMemAllocName, &SFunc);
345  if (SFunc) {
346  clSetKernelExecInfo(cast<cl_kernel>(kernel),
347  CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
348  sizeof(cl_bool), &TrueVal);
349  }
350  return PI_SUCCESS;
351 }
352 
353 extern "C" {
354 
356  size_t paramValueSize, void *paramValue,
357  size_t *paramValueSizeRet) {
358  switch (paramName) {
359  // TODO: Check regularly to see if support in enabled in OpenCL.
360  // Intel GPU EU device-specific information extensions.
361  // Some of the queries are enabled by cl_intel_device_attribute_query
362  // extension, but it's not yet in the Registry.
371  // TODO: Check if device UUID extension is enabled in OpenCL.
372  // For details about Intel UUID extension, see
373  // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md
374  case PI_DEVICE_INFO_UUID:
375  return PI_ERROR_INVALID_VALUE;
377  // This query is missing before OpenCL 3.0
378  // Check version and handle appropriately
379  OCLV::OpenCLVersion devVer;
380  cl_device_id deviceID = cast<cl_device_id>(device);
381  cl_int ret_err = getDeviceVersion(deviceID, devVer);
382  if (ret_err != CL_SUCCESS) {
383  return cast<pi_result>(ret_err);
384  }
385 
386  // Minimum required capability to be returned
387  // For OpenCL 1.2, this is all that is required
389 
390  if (devVer >= OCLV::V3_0) {
391  // For OpenCL >=3.0, the query should be implemented
392  cl_device_atomic_capabilities cl_capabilities = 0;
393  cl_int ret_err = clGetDeviceInfo(
394  deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
395  sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr);
396  if (ret_err != CL_SUCCESS)
397  return cast<pi_result>(ret_err);
398 
399  // Mask operation to only consider atomic_memory_order* capabilities
400  cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED |
401  CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
402  CL_DEVICE_ATOMIC_ORDER_SEQ_CST;
403  cl_capabilities &= mask;
404 
405  // The memory order capabilities are hierarchical, if one is implied, all
406  // preceding capbilities are implied as well. Especially in the case of
407  // ACQ_REL.
408  if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
409  capabilities |= PI_MEMORY_ORDER_SEQ_CST;
410  }
411  if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) {
414  }
415  } else if (devVer >= OCLV::V2_0) {
416  // For OpenCL 2.x, return all capabilities
417  // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model)
420  }
421 
422  if (paramValue) {
423  if (paramValueSize < sizeof(pi_memory_order_capabilities))
424  return static_cast<pi_result>(CL_INVALID_VALUE);
425 
426  std::memcpy(paramValue, &capabilities, sizeof(capabilities));
427  }
428 
429  if (paramValueSizeRet)
430  *paramValueSizeRet = sizeof(capabilities);
431 
432  return static_cast<pi_result>(CL_SUCCESS);
433  }
435  // Initialize result to minimum mandated capabilities according to
436  // SYCL2020 4.6.3.2
437  // Because scopes are hierarchical, wider scopes support all narrower
438  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
439  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
443 
444  OCLV::OpenCLVersion devVer;
445 
446  cl_device_id deviceID = cast<cl_device_id>(device);
447  cl_int ret_err = getDeviceVersion(deviceID, devVer);
448  if (ret_err != CL_SUCCESS)
449  return static_cast<pi_result>(ret_err);
450 
451  cl_device_atomic_capabilities devCapabilities = 0;
452  if (devVer >= OCLV::V3_0) {
453  ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
454  sizeof(cl_device_atomic_capabilities),
455  &devCapabilities, nullptr);
456  if (ret_err != CL_SUCCESS)
457  return static_cast<pi_result>(ret_err);
458  assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
459  "Violates minimum mandated guarantee");
460 
461  // Because scopes are hierarchical, wider scopes support all narrower
462  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
463  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
464  // We already initialized to these minimum mandated capabilities. Just
465  // check wider scopes.
466  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
467  result |= PI_MEMORY_SCOPE_DEVICE;
468  }
469 
470  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
471  result |= PI_MEMORY_SCOPE_SYSTEM;
472  }
473 
474  } else {
475  // This info is only available in OpenCL version >= 3.0
476  // Just return minimum mandated capabilities for older versions.
477  // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we
478  // already initialized using it.
479  if (devVer >= OCLV::V2_0) {
480  // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE |
481  // ALL_DEVICES
483  }
484  }
485  if (paramValue) {
486  if (paramValueSize < sizeof(cl_device_atomic_capabilities))
487  return PI_ERROR_INVALID_VALUE;
488 
489  std::memcpy(paramValue, &result, sizeof(result));
490  }
491  if (paramValueSizeRet)
492  *paramValueSizeRet = sizeof(result);
493  return PI_SUCCESS;
494  }
496  // Initialize result to minimum mandated capabilities according to
497  // SYCL2020 4.6.3.2
501 
502  OCLV::OpenCLVersion devVer;
503 
504  cl_device_id deviceID = cast<cl_device_id>(device);
505  cl_int ret_err = getDeviceVersion(deviceID, devVer);
506  if (ret_err != CL_SUCCESS)
507  return static_cast<pi_result>(ret_err);
508 
509  cl_device_atomic_capabilities devCapabilities = 0;
510  if (devVer >= OCLV::V3_0) {
511  ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
512  sizeof(cl_device_atomic_capabilities),
513  &devCapabilities, nullptr);
514  if (ret_err != CL_SUCCESS)
515  return static_cast<pi_result>(ret_err);
516  assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) &&
517  "Violates minimum mandated guarantee");
518  assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) &&
519  "Violates minimum mandated guarantee");
520 
521  // We already initialized to minimum mandated capabilities. Just
522  // check stronger orders.
523  if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
524  result |= PI_MEMORY_ORDER_SEQ_CST;
525  }
526 
527  } else {
528  // This info is only available in OpenCL version >= 3.0
529  // Just return minimum mandated capabilities for older versions.
530  // OpenCL 1.x minimum mandated capabilities are RELAXED | ACQ_REL, we
531  // already initialized using these.
532  if (devVer >= OCLV::V2_0) {
533  // OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL |
534  // SEQ_CST
535  result |= PI_MEMORY_ORDER_SEQ_CST;
536  }
537  }
538  if (paramValue) {
539  if (paramValueSize < sizeof(cl_device_atomic_capabilities))
540  return PI_ERROR_INVALID_VALUE;
541 
542  std::memcpy(paramValue, &result, sizeof(result));
543  }
544  if (paramValueSizeRet)
545  *paramValueSizeRet = sizeof(result);
546  return PI_SUCCESS;
547  }
549  // Initialize result to minimum mandated capabilities according to
550  // SYCL2020 4.6.3.2.
551  // Because scopes are hierarchical, wider scopes support all narrower
552  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
553  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
557 
558  OCLV::OpenCLVersion devVer;
559 
560  cl_device_id deviceID = cast<cl_device_id>(device);
561  cl_int ret_err = getDeviceVersion(deviceID, devVer);
562  if (ret_err != CL_SUCCESS)
563  return static_cast<pi_result>(ret_err);
564 
565  cl_device_atomic_capabilities devCapabilities = 0;
566  if (devVer >= OCLV::V3_0) {
567  ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
568  sizeof(cl_device_atomic_capabilities),
569  &devCapabilities, nullptr);
570  if (ret_err != CL_SUCCESS)
571  return static_cast<pi_result>(ret_err);
572  assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
573  "Violates minimum mandated guarantee");
574 
575  // Because scopes are hierarchical, wider scopes support all narrower
576  // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
577  // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
578  // We already initialized to these minimum mandated capabilities. Just
579  // check wider scopes.
580  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
581  result |= PI_MEMORY_SCOPE_DEVICE;
582  }
583 
584  if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
585  result |= PI_MEMORY_SCOPE_SYSTEM;
586  }
587 
588  } else {
589  // This info is only available in OpenCL version >= 3.0
590  // Just return minimum mandated capabilities for older versions.
591  // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we
592  // already initialized using it.
593  if (devVer >= OCLV::V2_0) {
594  // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE |
595  // ALL_DEVICES
597  }
598  }
599  if (paramValue) {
600  if (paramValueSize < sizeof(cl_device_atomic_capabilities))
601  return PI_ERROR_INVALID_VALUE;
602 
603  std::memcpy(paramValue, &result, sizeof(result));
604  }
605  if (paramValueSizeRet)
606  *paramValueSizeRet = sizeof(result);
607  return PI_SUCCESS;
608  }
610  cl_int ret_err = CL_SUCCESS;
611  cl_bool result = CL_FALSE;
612  bool supported = false;
613 
614  ret_err = checkDeviceExtensions(
615  cast<cl_device_id>(device),
616  {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"},
617  supported);
618  if (ret_err != CL_SUCCESS)
619  return static_cast<pi_result>(ret_err);
620 
621  result = supported;
622  std::memcpy(paramValue, &result, sizeof(cl_bool));
623  return PI_SUCCESS;
624  }
626  // bfloat16 math functions are not yet supported on Intel GPUs.
627  bool result = false;
628  if (paramValueSize < sizeof(result))
629  return PI_ERROR_INVALID_VALUE;
630  std::memcpy(paramValue, &result, sizeof(result));
631  return PI_SUCCESS;
632  }
634  bool result = true;
635  if (paramValueSize < sizeof(result))
636  return PI_ERROR_INVALID_VALUE;
637  std::memcpy(paramValue, &result, sizeof(result));
638  return PI_SUCCESS;
639  }
641  cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
642  cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
643  sizeof(cl_device_type), &devType, nullptr);
644 
645  // FIXME: here we assume that program built for a root GPU device can be
646  // used on its sub-devices without re-building
647  bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU);
648  if (paramValueSize < sizeof(result))
649  return PI_ERROR_INVALID_VALUE;
650  std::memcpy(paramValue, &result, sizeof(result));
651  return PI_SUCCESS;
652  }
654  // Returns the maximum sizes of a work group for each dimension one
655  // could use to submit a kernel. There is no such query defined in OpenCL
656  // so we'll return the maximum value.
657  {
658  if (paramValueSizeRet)
659  *paramValueSizeRet = paramValueSize;
660  static constexpr size_t Max = (std::numeric_limits<size_t>::max)();
661  size_t *out = cast<size_t *>(paramValue);
662  if (paramValueSize >= sizeof(size_t))
663  out[0] = Max;
664  if (paramValueSize >= 2 * sizeof(size_t))
665  out[1] = Max;
666  if (paramValueSize >= 3 * sizeof(size_t))
667  out[2] = Max;
668  return PI_SUCCESS;
669  }
671  pi_int32 result = 1;
672  std::memcpy(paramValue, &result, sizeof(pi_int32));
673  return PI_SUCCESS;
674  }
676  // Corresponding OpenCL query is only available starting with OpenCL 2.1 and
677  // we have to emulate it on older OpenCL runtimes.
678  OCLV::OpenCLVersion version;
679  cl_int err = getDeviceVersion(cast<cl_device_id>(device), version);
680  if (err != CL_SUCCESS)
681  return static_cast<pi_result>(err);
682 
683  if (version >= OCLV::V2_1) {
684  err = clGetDeviceInfo(cast<cl_device_id>(device),
685  cast<cl_device_info>(paramName), paramValueSize,
686  paramValue, paramValueSizeRet);
687  if (err != CL_SUCCESS)
688  return static_cast<pi_result>(err);
689 
690  if (paramValue && *static_cast<cl_uint *>(paramValue) == 0u) {
691  // OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 spec
692  // says that minimum possible value is 1.
693  cl_uint value = 1u;
694  std::memcpy(paramValue, &value, sizeof(cl_uint));
695  }
696 
697  return static_cast<pi_result>(err);
698  }
699 
700  // Otherwise, we can't query anything, because even cl_khr_subgroups does
701  // not provide similar query. Therefore, simply return minimum possible
702  // value 1 here.
703  if (paramValue && paramValueSize < sizeof(cl_uint))
704  return static_cast<pi_result>(CL_INVALID_VALUE);
705  if (paramValueSizeRet)
706  *paramValueSizeRet = sizeof(cl_uint);
707 
708  if (paramValue) {
709  cl_uint value = 1u;
710  std::memcpy(paramValue, &value, sizeof(cl_uint));
711  }
712 
713  return static_cast<pi_result>(CL_SUCCESS);
714  }
716  // TODO: return some meaningful for backend_version below
717  const char *value = "";
718  size_t valueSize = (strlen(value) + 1) * sizeof(char);
719  if (paramValue)
720  std::memcpy(paramValue, value, valueSize);
721  if (paramValueSizeRet != nullptr)
722  *paramValueSizeRet = valueSize;
723  return PI_SUCCESS;
724  }
726  cl_int ret_err = CL_SUCCESS;
727  bool result = false;
728  if (paramValueSize < sizeof(result))
729  return PI_ERROR_INVALID_VALUE;
730  bool supported = false;
731 
732  ret_err =
733  checkDeviceExtensions(cast<cl_device_id>(device),
734  {"cl_intel_mem_channel_property"}, supported);
735  if (ret_err != CL_SUCCESS)
736  return static_cast<pi_result>(ret_err);
737 
738  result = supported;
739  std::memcpy(paramValue, &result, sizeof(result));
740  return PI_SUCCESS;
741  }
743  bool result = false;
744  if (paramValueSize < sizeof(result))
745  return PI_ERROR_INVALID_VALUE;
746  cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
747  cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
748  sizeof(cl_device_type), &devType, nullptr);
749  if (res != CL_SUCCESS)
750  return static_cast<pi_result>(res);
751 
752  pi_uint32 vendorId = 0;
753  res = clGetDeviceInfo(cast<cl_device_id>(device), PI_DEVICE_INFO_VENDOR_ID,
754  sizeof(vendorId), &vendorId, nullptr);
755  if (res != CL_SUCCESS)
756  return static_cast<pi_result>(res);
757  // ESIMD is only supported by Intel GPUs.
758  result = devType == CL_DEVICE_TYPE_GPU && vendorId == 0x8086;
759  if (paramValue)
760  std::memcpy(paramValue, &result, sizeof(result));
761  return PI_SUCCESS;
762  }
763  default:
764  cl_int result = clGetDeviceInfo(
765  cast<cl_device_id>(device), cast<cl_device_info>(paramName),
766  paramValueSize, paramValue, paramValueSizeRet);
767  return static_cast<pi_result>(result);
768  }
769 }
770 
772  pi_uint32 *num_platforms) {
773  cl_int result = clGetPlatformIDs(cast<cl_uint>(num_entries),
774  cast<cl_platform_id *>(platforms),
775  cast<cl_uint *>(num_platforms));
776 
777  // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms
778  if (result == CL_PLATFORM_NOT_FOUND_KHR) {
779  assert(num_platforms != 0);
780  *num_platforms = 0;
781  result = PI_SUCCESS;
782  }
783  return static_cast<pi_result>(result);
784 }
785 
787  size_t paramValueSize, void *paramValue,
788  size_t *paramValueSizeRet) {
789 
790  switch (paramName) {
793  if (paramValue) {
794  if (paramValueSize < sizeof(result))
795  return PI_ERROR_INVALID_VALUE;
796  std::memcpy(paramValue, &result, sizeof(result));
797  }
798  if (paramValueSizeRet)
799  *paramValueSizeRet = sizeof(result);
800  return PI_SUCCESS;
801  }
802  default: {
803  cl_int result = clGetPlatformInfo(
804  cast<cl_platform_id>(platform), cast<cl_platform_info>(paramName),
805  paramValueSize, paramValue, paramValueSizeRet);
806  return static_cast<pi_result>(result);
807  }
808  }
809  return PI_SUCCESS;
810 }
811 
813  pi_platform *platform) {
814  assert(platform);
815  assert(nativeHandle);
816  *platform = reinterpret_cast<pi_platform>(nativeHandle);
817  return PI_SUCCESS;
818 }
819 
821  pi_uint32 num_entries, pi_device *devices,
822  pi_uint32 *num_devices) {
823  cl_int result = clGetDeviceIDs(
824  cast<cl_platform_id>(platform), cast<cl_device_type>(device_type),
825  cast<cl_uint>(num_entries), cast<cl_device_id *>(devices),
826  cast<cl_uint *>(num_devices));
827 
828  // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices
829  if (result == CL_DEVICE_NOT_FOUND) {
830  assert(num_devices != 0);
831  *num_devices = 0;
832  result = PI_SUCCESS;
833  }
834  return cast<pi_result>(result);
835 }
836 
838  pi_uint32 num_images,
839  pi_uint32 *selected_image_ind) {
840 
841  // TODO: this is a bare-bones implementation for choosing a device image
842  // that would be compatible with the targeted device. An AOT-compiled
843  // image is preferred over SPIR-V for known devices (i.e. Intel devices)
844  // The implementation makes no effort to differentiate between multiple images
845  // for the given device, and simply picks the first one compatible
846  // Real implementation will use the same mechanism OpenCL ICD dispatcher
847  // uses. Something like:
848  // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT);
849  // return context->dispatch->piextDeviceSelectIR(
850  // ctx, images, num_images, selected_image);
851  // where context->dispatch is set to the dispatch table provided by PI
852  // plugin for platform/device the ctx was created for.
853 
854  // Choose the binary target for the provided device
855  const char *image_target = nullptr;
856  // Get the type of the device
857  cl_device_type device_type;
858  constexpr pi_uint32 invalid_ind = std::numeric_limits<pi_uint32>::max();
859  cl_int ret_err =
860  clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
861  sizeof(cl_device_type), &device_type, nullptr);
862  if (ret_err != CL_SUCCESS) {
863  *selected_image_ind = invalid_ind;
864  return cast<pi_result>(ret_err);
865  }
866 
867  switch (device_type) {
868  // TODO: Factor out vendor specifics into a separate source
869  // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp?
870 
871  // We'll attempt to find an image that was AOT-compiled
872  // from a SPIR-V image into an image specific for:
873 
874  case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU
876  break;
877  case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU
879  break;
880  case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA
882  break;
883  default:
884  // Otherwise, we'll attempt to find and JIT-compile
885  // a device-independent SPIR-V image
887  break;
888  }
889 
890  // Find the appropriate device image, fallback to spirv if not found
891  pi_uint32 fallback = invalid_ind;
892  for (pi_uint32 i = 0; i < num_images; ++i) {
893  if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) {
894  *selected_image_ind = i;
895  return PI_SUCCESS;
896  }
897  if (strcmp(images[i]->DeviceTargetSpec,
899  fallback = i;
900  }
901  // Points to a spirv image, if such indeed was found
902  if ((*selected_image_ind = fallback) != invalid_ind)
903  return PI_SUCCESS;
904  // No image can be loaded for the given device
905  return PI_ERROR_INVALID_BINARY;
906 }
907 
909  pi_platform, pi_device *piDevice) {
910  assert(piDevice != nullptr);
911  *piDevice = reinterpret_cast<pi_device>(nativeHandle);
912  return PI_SUCCESS;
913 }
914 
916  pi_queue_properties *Properties, pi_queue *Queue) {
917  assert(Properties);
918  // Expect flags mask to be passed first.
919  assert(Properties[0] == PI_QUEUE_FLAGS);
920  if (Properties[0] != PI_QUEUE_FLAGS)
921  return PI_ERROR_INVALID_VALUE;
922  pi_queue_properties Flags = Properties[1];
923  // Extra data isn't supported yet.
924  assert(Properties[2] == 0);
925  if (Properties[2] != 0)
926  return PI_ERROR_INVALID_VALUE;
927  return piQueueCreate(Context, Device, Flags, Queue);
928 }
930  pi_queue_properties properties, pi_queue *queue) {
931  assert(queue && "piQueueCreate failed, queue argument is null");
932 
933  cl_platform_id curPlatform;
934  cl_int ret_err =
935  clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_PLATFORM,
936  sizeof(cl_platform_id), &curPlatform, nullptr);
937 
938  CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err);
939 
940  // Check that unexpected bits are not set.
941  assert(!(properties &
948 
949  // Properties supported by OpenCL backend.
950  cl_command_queue_properties SupportByOpenCL =
951  CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE |
952  CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT;
953 
954  OCLV::OpenCLVersion version;
955  ret_err = getPlatformVersion(curPlatform, version);
956 
957  CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err);
958 
959  if (version >= OCLV::V2_0) {
960  *queue = cast<pi_queue>(clCreateCommandQueue(
961  cast<cl_context>(context), cast<cl_device_id>(device),
962  cast<cl_command_queue_properties>(properties) & SupportByOpenCL,
963  &ret_err));
964  return cast<pi_result>(ret_err);
965  }
966 
967  cl_queue_properties CreationFlagProperties[] = {
968  CL_QUEUE_PROPERTIES,
969  cast<cl_command_queue_properties>(properties) & SupportByOpenCL, 0};
970  *queue = cast<pi_queue>(clCreateCommandQueueWithProperties(
971  cast<cl_context>(context), cast<cl_device_id>(device),
972  CreationFlagProperties, &ret_err));
973  return cast<pi_result>(ret_err);
974 }
975 
977  size_t param_value_size, void *param_value,
978  size_t *param_value_size_ret) {
979  if (queue == nullptr) {
980  return PI_ERROR_INVALID_QUEUE;
981  }
982 
983  switch (param_name) {
985  // OpenCL doesn't provide API to check the status of the queue.
986  return PI_ERROR_INVALID_VALUE;
987  default:
988  cl_int CLErr = clGetCommandQueueInfo(
989  cast<cl_command_queue>(queue), cast<cl_command_queue_info>(param_name),
990  param_value_size, param_value, param_value_size_ret);
991  if (CLErr != CL_SUCCESS) {
992  return cast<pi_result>(CLErr);
993  }
994  }
995  return PI_SUCCESS;
996 }
997 
999  int32_t NativeHandleDesc, pi_context,
1000  pi_device, bool ownNativeHandle,
1001  pi_queue_properties *Properties,
1002  pi_queue *piQueue) {
1003  (void)NativeHandleDesc;
1004  (void)ownNativeHandle;
1005  (void)Properties;
1006  assert(piQueue != nullptr);
1007  *piQueue = reinterpret_cast<pi_queue>(nativeHandle);
1008  clRetainCommandQueue(cast<cl_command_queue>(nativeHandle));
1009  return PI_SUCCESS;
1010 }
1011 
1012 pi_result piProgramCreate(pi_context context, const void *il, size_t length,
1013  pi_program *res_program) {
1014  cl_uint deviceCount;
1015  cl_int ret_err =
1016  clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
1017  sizeof(cl_uint), &deviceCount, nullptr);
1018 
1019  std::vector<cl_device_id> devicesInCtx(deviceCount);
1020 
1021  if (ret_err != CL_SUCCESS || deviceCount < 1) {
1022  if (res_program != nullptr)
1023  *res_program = nullptr;
1024  return cast<pi_result>(CL_INVALID_CONTEXT);
1025  }
1026 
1027  ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
1028  deviceCount * sizeof(cl_device_id),
1029  devicesInCtx.data(), nullptr);
1030 
1031  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
1032 
1033  cl_platform_id curPlatform;
1034  ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM,
1035  sizeof(cl_platform_id), &curPlatform, nullptr);
1036 
1037  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
1038 
1039  OCLV::OpenCLVersion platVer;
1040  ret_err = getPlatformVersion(curPlatform, platVer);
1041 
1042  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
1043 
1044  pi_result err = PI_SUCCESS;
1045  if (platVer >= OCLV::V2_1) {
1046 
1047  /* Make sure all devices support CL 2.1 or newer as well. */
1048  for (cl_device_id dev : devicesInCtx) {
1049  OCLV::OpenCLVersion devVer;
1050 
1051  ret_err = getDeviceVersion(dev, devVer);
1052  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
1053 
1054  /* If the device does not support CL 2.1 or greater, we need to make sure
1055  * it supports the cl_khr_il_program extension.
1056  */
1057  if (devVer < OCLV::V2_1) {
1058  bool supported = false;
1059 
1060  ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported);
1061  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
1062 
1063  if (!supported)
1064  return cast<pi_result>(CL_INVALID_OPERATION);
1065  }
1066  }
1067  if (res_program != nullptr)
1068  *res_program = cast<pi_program>(clCreateProgramWithIL(
1069  cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
1070  return err;
1071  }
1072 
1073  /* If none of the devices conform with CL 2.1 or newer make sure they all
1074  * support the cl_khr_il_program extension.
1075  */
1076  for (cl_device_id dev : devicesInCtx) {
1077  bool supported = false;
1078 
1079  ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported);
1080  CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT);
1081 
1082  if (!supported)
1083  return cast<pi_result>(CL_INVALID_OPERATION);
1084  }
1085 
1086  using apiFuncT =
1087  cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *);
1088  apiFuncT funcPtr =
1089  reinterpret_cast<apiFuncT>(clGetExtensionFunctionAddressForPlatform(
1090  curPlatform, "clCreateProgramWithILKHR"));
1091 
1092  assert(funcPtr != nullptr);
1093  if (res_program != nullptr)
1094  *res_program = cast<pi_program>(
1095  funcPtr(cast<cl_context>(context), il, length, cast<cl_int *>(&err)));
1096  else
1097  err = PI_ERROR_INVALID_VALUE;
1098 
1099  return err;
1100 }
1101 
1103  pi_context, bool,
1104  pi_program *piProgram) {
1105  assert(piProgram != nullptr);
1106  *piProgram = reinterpret_cast<pi_program>(nativeHandle);
1107  return PI_SUCCESS;
1108 }
1109 
1111  const pi_sampler_properties *sampler_properties,
1112  pi_sampler *result_sampler) {
1113  // Initialize properties according to OpenCL 2.1 spec.
1114  pi_result error_code;
1115  pi_bool normalizedCoords = PI_TRUE;
1118 
1119  // Unpack sampler properties
1120  for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0;
1121  ++i) {
1122  if (sampler_properties[i] == PI_SAMPLER_INFO_NORMALIZED_COORDS) {
1123  normalizedCoords = static_cast<pi_bool>(sampler_properties[++i]);
1124  } else if (sampler_properties[i] == PI_SAMPLER_INFO_ADDRESSING_MODE) {
1125  addressingMode =
1126  static_cast<pi_sampler_addressing_mode>(sampler_properties[++i]);
1127  } else if (sampler_properties[i] == PI_SAMPLER_INFO_FILTER_MODE) {
1128  filterMode = static_cast<pi_sampler_filter_mode>(sampler_properties[++i]);
1129  } else {
1130  assert(false && "Cannot recognize sampler property");
1131  }
1132  }
1133 
1134  // Always call OpenCL 1.0 API
1135  *result_sampler = cast<pi_sampler>(
1136  clCreateSampler(cast<cl_context>(context), normalizedCoords,
1137  addressingMode, filterMode, cast<cl_int *>(&error_code)));
1138  return error_code;
1139 }
1140 
1142  const pi_mem_obj_property *arg_properties,
1143  const pi_mem *arg_value) {
1144  std::ignore = arg_properties;
1145  return cast<pi_result>(
1146  clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
1147  sizeof(arg_value), cast<const cl_mem *>(arg_value)));
1148 }
1149 
1151  const pi_sampler *arg_value) {
1152  return cast<pi_result>(
1153  clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
1154  sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
1155 }
1156 
1158  pi_context, pi_program, bool,
1159  pi_kernel *piKernel) {
1160  assert(piKernel != nullptr);
1161  *piKernel = reinterpret_cast<pi_kernel>(nativeHandle);
1162  return PI_SUCCESS;
1163 }
1164 
1165 // Function gets characters between delimeter's in str
1166 // then checks if they are equal to the sub_str.
1167 // returns true if there is at least one instance
1168 // returns false if there are no instances of the name
1169 static bool is_in_separated_string(const std::string &str, char delimiter,
1170  const std::string &sub_str) {
1171  size_t beg = 0;
1172  size_t length = 0;
1173  for (const auto &x : str) {
1174  if (x == delimiter) {
1175  if (str.substr(beg, length) == sub_str)
1176  return true;
1177 
1178  beg += length + 1;
1179  length = 0;
1180  continue;
1181  }
1182  length++;
1183  }
1184  if (length != 0)
1185  if (str.substr(beg, length) == sub_str)
1186  return true;
1187 
1188  return false;
1189 }
1190 
1192  const char *func_name,
1193  pi_uint64 *function_pointer_ret) {
1194 
1195  cl_context CLContext = nullptr;
1196  cl_int ret_err =
1197  clGetProgramInfo(cast<cl_program>(program), CL_PROGRAM_CONTEXT,
1198  sizeof(CLContext), &CLContext, nullptr);
1199 
1200  if (ret_err != CL_SUCCESS)
1201  return cast<pi_result>(ret_err);
1202 
1203  clGetDeviceFunctionPointer_fn FuncT = nullptr;
1204  ret_err = getExtFuncFromContext<clGetDeviceFunctionPointer_fn>(
1207 
1208  pi_result pi_ret_err = PI_SUCCESS;
1209 
1210  // Check if kernel name exists, to prevent opencl runtime throwing exception
1211  // with cpu runtime
1212  // TODO: Use fallback search method if extension does not exist once CPU
1213  // runtime no longer throws exceptions and prints messages when given
1214  // unavailable functions.
1215  *function_pointer_ret = 0;
1216  size_t Size;
1217  cl_int Res =
1218  clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
1219  0, nullptr, &Size);
1220  if (Res != CL_SUCCESS)
1221  return cast<pi_result>(Res);
1222 
1223  std::string ClResult(Size, ' ');
1224  Res =
1225  clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
1226  ClResult.size(), &ClResult[0], nullptr);
1227  if (Res != CL_SUCCESS)
1228  return cast<pi_result>(Res);
1229 
1230  // Get rid of the null terminator and search for kernel_name
1231  // If function cannot be found return error code to indicate it
1232  // exists
1233  ClResult.pop_back();
1234  if (!is_in_separated_string(ClResult, ';', func_name))
1235  return PI_ERROR_INVALID_KERNEL_NAME;
1236 
1237  pi_ret_err = PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1238 
1239  // If clGetDeviceFunctionPointer is in list of extensions
1240  if (FuncT) {
1241  pi_ret_err = cast<pi_result>(FuncT(cast<cl_device_id>(device),
1242  cast<cl_program>(program), func_name,
1243  function_pointer_ret));
1244  // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address
1245  // cannot be found even if kernel exits. As the kernel does exist return
1246  // that the address is not available
1247  if (pi_ret_err == CL_INVALID_ARG_VALUE) {
1248  *function_pointer_ret = 0;
1249  return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
1250  }
1251  }
1252  return pi_ret_err;
1253 }
1254 
1256  pi_uint32 num_devices, const pi_device *devices,
1257  void (*pfn_notify)(const char *errinfo,
1258  const void *private_info,
1259  size_t cb, void *user_data1),
1260  void *user_data, pi_context *retcontext) {
1261  pi_result ret = PI_ERROR_INVALID_OPERATION;
1262  *retcontext = cast<pi_context>(
1263  clCreateContext(properties, cast<cl_uint>(num_devices),
1264  cast<const cl_device_id *>(devices), pfn_notify,
1265  user_data, cast<cl_int *>(&ret)));
1266 
1267  return ret;
1268 }
1269 
1271  pi_uint32 num_devices,
1272  const pi_device *devices,
1273  bool ownNativeHandle,
1274  pi_context *piContext) {
1275  (void)num_devices;
1276  (void)devices;
1277  (void)ownNativeHandle;
1278  assert(piContext != nullptr);
1279  assert(ownNativeHandle == false);
1280  *piContext = reinterpret_cast<pi_context>(nativeHandle);
1281  return PI_SUCCESS;
1282 }
1283 
1285  size_t paramValueSize, void *paramValue,
1286  size_t *paramValueSizeRet) {
1287  switch (paramName) {
1291  // 2D USM memops are not supported.
1292  cl_bool result = false;
1293  std::memcpy(paramValue, &result, sizeof(cl_bool));
1294  return PI_SUCCESS;
1295  }
1300  // These queries should be dealt with in context_impl.cpp by calling the
1301  // queries of each device separately and building the intersection set.
1302  setErrorMessage("These queries should have never come here.",
1303  PI_ERROR_INVALID_ARG_VALUE);
1304  return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
1305  }
1306  default:
1307  cl_int result = clGetContextInfo(
1308  cast<cl_context>(context), cast<cl_context_info>(paramName),
1309  paramValueSize, paramValue, paramValueSizeRet);
1310  return static_cast<pi_result>(result);
1311  }
1312 }
1313 
1315  void *host_ptr, pi_mem *ret_mem,
1316  const pi_mem_properties *properties) {
1317  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1318  if (properties) {
1319  // TODO: need to check if all properties are supported by OpenCL RT and
1320  // ignore unsupported
1321  clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
1322  cl_context CLContext = cast<cl_context>(context);
1323  // First we need to look up the function pointer
1324  ret_err = getExtFuncFromContext<clCreateBufferWithPropertiesINTEL_fn>(
1327  if (FuncPtr) {
1328  *ret_mem =
1329  cast<pi_mem>(FuncPtr(CLContext, properties, cast<cl_mem_flags>(flags),
1330  size, host_ptr, cast<cl_int *>(&ret_err)));
1331  return ret_err;
1332  }
1333  }
1334 
1335  *ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
1336  cast<cl_mem_flags>(flags), size,
1337  host_ptr, cast<cl_int *>(&ret_err)));
1338  return ret_err;
1339 }
1340 
1343  const pi_image_desc *image_desc, void *host_ptr,
1344  pi_mem *ret_mem) {
1345  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1346  *ret_mem = cast<pi_mem>(
1347  clCreateImage(cast<cl_context>(context), cast<cl_mem_flags>(flags),
1348  cast<const cl_image_format *>(image_format),
1349  cast<const cl_image_desc *>(image_desc), host_ptr,
1350  cast<cl_int *>(&ret_err)));
1351 
1352  return ret_err;
1353 }
1354 
1356  pi_buffer_create_type buffer_create_type,
1357  void *buffer_create_info, pi_mem *ret_mem) {
1358 
1359  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1360  *ret_mem = cast<pi_mem>(
1361  clCreateSubBuffer(cast<cl_mem>(buffer), cast<cl_mem_flags>(flags),
1362  cast<cl_buffer_create_type>(buffer_create_type),
1363  buffer_create_info, cast<cl_int *>(&ret_err)));
1364  return ret_err;
1365 }
1366 
1368  pi_context context,
1369  bool ownNativeHandle, pi_mem *piMem) {
1370  (void)context;
1371  (void)ownNativeHandle;
1372  assert(piMem != nullptr);
1373  *piMem = reinterpret_cast<pi_mem>(nativeHandle);
1374  return PI_SUCCESS;
1375 }
1376 
1378  pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle,
1379  const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc,
1380  pi_mem *Img) {
1381  (void)context;
1382  (void)ownNativeHandle;
1383  (void)ImageFormat;
1384  (void)ImageDesc;
1385  assert(Img != nullptr);
1386  *Img = reinterpret_cast<pi_mem>(nativeHandle);
1387  return PI_SUCCESS;
1388 }
1389 
1391  pi_context context, pi_uint32 num_devices, const pi_device *device_list,
1392  const size_t *lengths, const unsigned char **binaries,
1393  size_t num_metadata_entries, const pi_device_binary_property *metadata,
1394  pi_int32 *binary_status, pi_program *ret_program) {
1395  (void)metadata;
1396  (void)num_metadata_entries;
1397 
1398  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1399  *ret_program = cast<pi_program>(clCreateProgramWithBinary(
1400  cast<cl_context>(context), cast<cl_uint>(num_devices),
1401  cast<const cl_device_id *>(device_list), lengths, binaries,
1402  cast<cl_int *>(binary_status), cast<cl_int *>(&ret_err)));
1403  return ret_err;
1404 }
1405 
1407  const pi_device *device_list, const char *options,
1408  pi_uint32 num_input_programs,
1409  const pi_program *input_programs,
1410  void (*pfn_notify)(pi_program program, void *user_data),
1411  void *user_data, pi_program *ret_program) {
1412 
1413  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1414  *ret_program = cast<pi_program>(
1415  clLinkProgram(cast<cl_context>(context), cast<cl_uint>(num_devices),
1416  cast<const cl_device_id *>(device_list), options,
1417  cast<cl_uint>(num_input_programs),
1418  cast<const cl_program *>(input_programs),
1419  cast<void (*)(cl_program, void *)>(pfn_notify), user_data,
1420  cast<cl_int *>(&ret_err)));
1421  return ret_err;
1422 }
1423 
1424 pi_result piKernelCreate(pi_program program, const char *kernel_name,
1425  pi_kernel *ret_kernel) {
1426 
1427  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1428  *ret_kernel = cast<pi_kernel>(clCreateKernel(
1429  cast<cl_program>(program), kernel_name, cast<cl_int *>(&ret_err)));
1430  return ret_err;
1431 }
1432 
1434  pi_kernel_group_info param_name,
1435  size_t param_value_size, void *param_value,
1436  size_t *param_value_size_ret) {
1437  if (kernel == nullptr) {
1438  return PI_ERROR_INVALID_KERNEL;
1439  }
1440 
1441  switch (param_name) {
1443  return PI_ERROR_INVALID_VALUE;
1444  default:
1445  cl_int result = clGetKernelWorkGroupInfo(
1446  cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1447  cast<cl_kernel_work_group_info>(param_name), param_value_size,
1448  param_value, param_value_size_ret);
1449  return static_cast<pi_result>(result);
1450  }
1451 }
1452 
1454  pi_kernel_sub_group_info param_name,
1455  size_t input_value_size,
1456  const void *input_value,
1457  size_t param_value_size, void *param_value,
1458  size_t *param_value_size_ret) {
1459  (void)param_value_size;
1460  size_t ret_val;
1461  cl_int ret_err;
1462 
1463  std::shared_ptr<void> implicit_input_value;
1464  if (param_name == PI_KERNEL_MAX_SUB_GROUP_SIZE && !input_value) {
1465  // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no
1466  // value is given we use the max work item size of the device in the first
1467  // dimention to avoid truncation of max sub-group size.
1468  pi_uint32 max_dims = 0;
1469  pi_result pi_ret_err =
1471  sizeof(pi_uint32), &max_dims, nullptr);
1472  if (pi_ret_err != PI_SUCCESS)
1473  return pi_ret_err;
1474  std::shared_ptr<size_t[]> WGSizes{new size_t[max_dims]};
1475  pi_ret_err =
1477  max_dims * sizeof(size_t), WGSizes.get(), nullptr);
1478  if (pi_ret_err != PI_SUCCESS)
1479  return pi_ret_err;
1480  for (size_t i = 1; i < max_dims; ++i)
1481  WGSizes.get()[i] = 1;
1482  implicit_input_value = std::move(WGSizes);
1483  input_value_size = max_dims * sizeof(size_t);
1484  input_value = implicit_input_value.get();
1485  }
1486 
1487  ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
1488  cast<cl_kernel>(kernel), cast<cl_device_id>(device),
1489  cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
1490  sizeof(size_t), &ret_val, param_value_size_ret));
1491 
1492  if (ret_err == CL_INVALID_OPERATION) {
1493  // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does
1494  // not support subgroups.
1495 
1496  if (param_name == PI_KERNEL_MAX_NUM_SUB_GROUPS) {
1497  ret_val = 1; // Minimum required by SYCL 2020 spec
1498  ret_err = CL_SUCCESS;
1499  } else if (param_name == PI_KERNEL_COMPILE_NUM_SUB_GROUPS) {
1500  ret_val = 0; // Not specified by kernel
1501  ret_err = CL_SUCCESS;
1502  } else if (param_name == PI_KERNEL_MAX_SUB_GROUP_SIZE) {
1503  // Return the maximum sub group size for the device
1504  size_t result_size = 0;
1505  // Two calls to piDeviceGetInfo are needed: the first determines the size
1506  // required to store the result, and the second returns the actual size
1507  // values.
1508  pi_result pi_ret_err =
1510  nullptr, &result_size);
1511  if (pi_ret_err != PI_SUCCESS) {
1512  return pi_ret_err;
1513  }
1514  assert(result_size % sizeof(size_t) == 0);
1515  std::vector<size_t> result(result_size / sizeof(size_t));
1517  result_size, result.data(), nullptr);
1518  if (pi_ret_err != PI_SUCCESS) {
1519  return pi_ret_err;
1520  }
1521  ret_val = *std::max_element(result.begin(), result.end());
1522  ret_err = CL_SUCCESS;
1523  } else if (param_name == PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL) {
1524  ret_val = 0; // Not specified by kernel
1525  ret_err = CL_SUCCESS;
1526  }
1527  }
1528 
1529  if (ret_err != CL_SUCCESS)
1530  return cast<pi_result>(ret_err);
1531 
1532  *(static_cast<uint32_t *>(param_value)) = static_cast<uint32_t>(ret_val);
1533  if (param_value_size_ret)
1534  *param_value_size_ret = sizeof(uint32_t);
1535  return PI_SUCCESS;
1536 }
1537 
1539 
1540  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1541  auto *cl_err = cast<cl_int *>(&ret_err);
1542 
1543  cl_event e = clCreateUserEvent(cast<cl_context>(context), cl_err);
1544  *ret_event = cast<pi_event>(e);
1545  if (*cl_err != CL_SUCCESS)
1546  return ret_err;
1547  *cl_err = clSetUserEventStatus(e, CL_COMPLETE);
1548  return ret_err;
1549 }
1550 
1552  pi_context context,
1553  bool ownNativeHandle,
1554  pi_event *piEvent) {
1555  (void)context;
1556  // TODO: ignore this, but eventually want to return error as unsupported
1557  (void)ownNativeHandle;
1558 
1559  assert(piEvent != nullptr);
1560  assert(nativeHandle);
1561  assert(context);
1562 
1563  *piEvent = reinterpret_cast<pi_event>(nativeHandle);
1564  return PI_SUCCESS;
1565 }
1566 
1568  pi_bool blocking_map, pi_map_flags map_flags,
1569  size_t offset, size_t size,
1570  pi_uint32 num_events_in_wait_list,
1571  const pi_event *event_wait_list,
1572  pi_event *event, void **ret_map) {
1573 
1574  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
1575  *ret_map = cast<void *>(clEnqueueMapBuffer(
1576  cast<cl_command_queue>(command_queue), cast<cl_mem>(buffer),
1577  cast<cl_bool>(blocking_map), map_flags, offset, size,
1578  cast<cl_uint>(num_events_in_wait_list),
1579  cast<const cl_event *>(event_wait_list), cast<cl_event *>(event),
1580  cast<cl_int *>(&ret_err)));
1581  return ret_err;
1582 }
1583 
1584 //
1585 // USM
1586 //
1587 
1595 pi_result piextUSMHostAlloc(void **result_ptr, pi_context context,
1596  pi_usm_mem_properties *properties, size_t size,
1597  pi_uint32 alignment) {
1598 
1599  void *Ptr = nullptr;
1600  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1601 
1602  // First we need to look up the function pointer
1603  clHostMemAllocINTEL_fn FuncPtr = nullptr;
1604  cl_context CLContext = cast<cl_context>(context);
1605  RetVal = getExtFuncFromContext<clHostMemAllocINTEL_fn>(
1607  &FuncPtr);
1608 
1609  if (FuncPtr) {
1610  Ptr = FuncPtr(CLContext, cast<cl_mem_properties_intel *>(properties), size,
1611  alignment, cast<cl_int *>(&RetVal));
1612  }
1613 
1614  *result_ptr = Ptr;
1615 
1616  // ensure we aligned the allocation correctly
1617  if (RetVal == PI_SUCCESS && alignment != 0)
1618  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0 &&
1619  "allocation not aligned correctly");
1620 
1621  return RetVal;
1622 }
1623 
1632 pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context,
1633  pi_device device,
1634  pi_usm_mem_properties *properties, size_t size,
1635  pi_uint32 alignment) {
1636 
1637  void *Ptr = nullptr;
1638  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1639 
1640  // First we need to look up the function pointer
1641  clDeviceMemAllocINTEL_fn FuncPtr = nullptr;
1642  cl_context CLContext = cast<cl_context>(context);
1643  RetVal = getExtFuncFromContext<clDeviceMemAllocINTEL_fn>(
1645  clDeviceMemAllocName, &FuncPtr);
1646 
1647  if (FuncPtr) {
1648  Ptr = FuncPtr(CLContext, cast<cl_device_id>(device),
1649  cast<cl_mem_properties_intel *>(properties), size, alignment,
1650  cast<cl_int *>(&RetVal));
1651  }
1652 
1653  *result_ptr = Ptr;
1654 
1655  // ensure we aligned the allocation correctly
1656  if (RetVal == PI_SUCCESS && alignment != 0)
1657  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0 &&
1658  "allocation not aligned correctly");
1659 
1660  return RetVal;
1661 }
1662 
1671 pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context,
1672  pi_device device,
1673  pi_usm_mem_properties *properties, size_t size,
1674  pi_uint32 alignment) {
1675 
1676  void *Ptr = nullptr;
1677  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1678 
1679  // First we need to look up the function pointer
1680  clSharedMemAllocINTEL_fn FuncPtr = nullptr;
1681  cl_context CLContext = cast<cl_context>(context);
1682  RetVal = getExtFuncFromContext<clSharedMemAllocINTEL_fn>(
1684  clSharedMemAllocName, &FuncPtr);
1685 
1686  if (FuncPtr) {
1687  Ptr = FuncPtr(cast<cl_context>(context), cast<cl_device_id>(device),
1688  cast<cl_mem_properties_intel *>(properties), size, alignment,
1689  cast<cl_int *>(&RetVal));
1690  }
1691 
1692  *result_ptr = Ptr;
1693 
1694  assert(alignment == 0 ||
1695  (RetVal == PI_SUCCESS &&
1696  reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
1697  return RetVal;
1698 }
1699 
1704 pi_result piextUSMFree(pi_context context, void *ptr) {
1705  // Use a blocking free to avoid issues with indirect access from kernels that
1706  // might be still running.
1707  clMemBlockingFreeINTEL_fn FuncPtr = nullptr;
1708 
1709  cl_context CLContext = cast<cl_context>(context);
1710  pi_result RetVal = PI_ERROR_INVALID_OPERATION;
1711  RetVal = getExtFuncFromContext<clMemBlockingFreeINTEL_fn>(
1713  clMemBlockingFreeName, &FuncPtr);
1714 
1715  if (FuncPtr) {
1716  RetVal = cast<pi_result>(FuncPtr(CLContext, ptr));
1717  }
1718 
1719  return RetVal;
1720 }
1721 
1730  size_t arg_size, const void *arg_value) {
1731  (void)arg_size;
1732 
1733  // Size is unused in CL as pointer args are passed by value.
1734 
1735  // Have to look up the context from the kernel
1736  cl_context CLContext;
1737  cl_int CLErr = clGetKernelInfo(cast<cl_kernel>(kernel), CL_KERNEL_CONTEXT,
1738  sizeof(cl_context), &CLContext, nullptr);
1739  if (CLErr != CL_SUCCESS) {
1740  return cast<pi_result>(CLErr);
1741  }
1742 
1743  clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr;
1744  pi_result RetVal = getExtFuncFromContext<clSetKernelArgMemPointerINTEL_fn>(
1746  clSetKernelArgMemPointerName, &FuncPtr);
1747 
1748  if (FuncPtr) {
1749  // OpenCL passes pointers by value not by reference
1750  // This means we need to deref the arg to get the pointer value
1751  auto PtrToPtr = reinterpret_cast<const intptr_t *>(arg_value);
1752  auto DerefPtr = reinterpret_cast<void *>(*PtrToPtr);
1753  RetVal =
1754  cast<pi_result>(FuncPtr(cast<cl_kernel>(kernel), arg_index, DerefPtr));
1755  }
1756 
1757  return RetVal;
1758 }
1759 
1771  size_t count, pi_uint32 num_events_in_waitlist,
1772  const pi_event *events_waitlist,
1773  pi_event *event) {
1774 
1775  // Have to look up the context from the kernel
1776  cl_context CLContext;
1777  cl_int CLErr =
1778  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1779  sizeof(cl_context), &CLContext, nullptr);
1780  if (CLErr != CL_SUCCESS) {
1781  return cast<pi_result>(CLErr);
1782  }
1783 
1784  clEnqueueMemFillINTEL_fn FuncPtr = nullptr;
1785  pi_result RetVal = getExtFuncFromContext<clEnqueueMemFillINTEL_fn>(
1787  clEnqueueMemFillName, &FuncPtr);
1788 
1789  if (FuncPtr) {
1790  RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue), ptr, &value,
1791  1, count, num_events_in_waitlist,
1792  cast<const cl_event *>(events_waitlist),
1793  cast<cl_event *>(event)));
1794  }
1795 
1796  return RetVal;
1797 }
1798 
1809 pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr,
1810  const void *src_ptr, size_t size,
1811  pi_uint32 num_events_in_waitlist,
1812  const pi_event *events_waitlist,
1813  pi_event *event) {
1814 
1815  // Have to look up the context from the kernel
1816  cl_context CLContext;
1817  cl_int CLErr =
1818  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1819  sizeof(cl_context), &CLContext, nullptr);
1820  if (CLErr != CL_SUCCESS) {
1821  return cast<pi_result>(CLErr);
1822  }
1823 
1824  clEnqueueMemcpyINTEL_fn FuncPtr = nullptr;
1825  pi_result RetVal = getExtFuncFromContext<clEnqueueMemcpyINTEL_fn>(
1827  clEnqueueMemcpyName, &FuncPtr);
1828 
1829  if (FuncPtr) {
1830  RetVal = cast<pi_result>(
1831  FuncPtr(cast<cl_command_queue>(queue), blocking, dst_ptr, src_ptr, size,
1832  num_events_in_waitlist, cast<const cl_event *>(events_waitlist),
1833  cast<cl_event *>(event)));
1834  }
1835 
1836  return RetVal;
1837 }
1838 
1848 pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size,
1849  pi_usm_migration_flags flags,
1850  pi_uint32 num_events_in_waitlist,
1851  const pi_event *events_waitlist,
1852  pi_event *event) {
1853  (void)ptr;
1854  (void)size;
1855 
1856  // flags is currently unused so fail if set
1857  if (flags != 0)
1858  return PI_ERROR_INVALID_VALUE;
1859 
1860  return cast<pi_result>(clEnqueueMarkerWithWaitList(
1861  cast<cl_command_queue>(queue), num_events_in_waitlist,
1862  cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
1863 
1864  /*
1865  // Use this once impls support it.
1866  // Have to look up the context from the kernel
1867  cl_context CLContext;
1868  cl_int CLErr =
1869  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
1870  sizeof(cl_context), &CLContext, nullptr);
1871  if (CLErr != CL_SUCCESS) {
1872  return cast<pi_result>(CLErr);
1873  }
1874 
1875  clEnqueueMigrateMemINTEL_fn FuncPtr;
1876  pi_result Err = getExtFuncFromContext<clEnqueueMigrateMemINTEL_fn>(
1877  cast<pi_context>(CLContext), "clEnqueueMigrateMemINTEL", &FuncPtr);
1878 
1879  if (Err != PI_SUCCESS) {
1880  RetVal = Err;
1881  } else {
1882  RetVal = cast<pi_result>(FuncPtr(
1883  cast<cl_command_queue>(queue), ptr, size, flags, num_events_in_waitlist,
1884  reinterpret_cast<const cl_event *>(events_waitlist),
1885  reinterpret_cast<cl_event *>(event)));
1886  }
1887  */
1888 }
1889 
1897 // USM memadvise API to govern behavior of automatic migration mechanisms
1899  size_t length, pi_mem_advice advice,
1900  pi_event *event) {
1901  (void)ptr;
1902  (void)length;
1903  (void)advice;
1904 
1905  return cast<pi_result>(
1906  clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0, nullptr,
1907  reinterpret_cast<cl_event *>(event)));
1908 
1909  /*
1910  // Change to use this once drivers support it.
1911 
1912  // Have to look up the context from the kernel
1913  cl_context CLContext;
1914  cl_int CLErr = clGetCommandQueueInfo(cast<cl_command_queue>(queue),
1915  CL_QUEUE_CONTEXT,
1916  sizeof(cl_context),
1917  &CLContext, nullptr);
1918  if (CLErr != CL_SUCCESS) {
1919  return cast<pi_result>(CLErr);
1920  }
1921 
1922  clEnqueueMemAdviseINTEL_fn FuncPtr;
1923  pi_result Err =
1924  getExtFuncFromContext<clEnqueueMemAdviseINTEL_fn>(
1925  cast<pi_context>(CLContext), "clEnqueueMemAdviseINTEL", &FuncPtr);
1926 
1927  if (Err != PI_SUCCESS) {
1928  RetVal = Err;
1929  } else {
1930  RetVal = cast<pi_result>(FuncPtr(cast<cl_command_queue>(queue),
1931  ptr, length, advice, 0, nullptr,
1932  reinterpret_cast<cl_event *>(event)));
1933  }
1934  */
1935 }
1936 
1949 __SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr,
1950  size_t pitch, size_t pattern_size,
1951  const void *pattern, size_t width,
1952  size_t height,
1953  pi_uint32 num_events_in_waitlist,
1954  const pi_event *events_waitlist,
1955  pi_event *event) {
1956  std::ignore = queue;
1957  std::ignore = ptr;
1958  std::ignore = pitch;
1959  std::ignore = pattern_size;
1960  std::ignore = pattern;
1961  std::ignore = width;
1962  std::ignore = height;
1963  std::ignore = num_events_in_waitlist;
1964  std::ignore = events_waitlist;
1965  std::ignore = event;
1966  return PI_ERROR_INVALID_OPERATION;
1967 }
1968 
1981  pi_queue queue, void *ptr, size_t pitch, int value, size_t width,
1982  size_t height, pi_uint32 num_events_in_waitlist,
1983  const pi_event *events_waitlist, pi_event *event) {
1984  std::ignore = queue;
1985  std::ignore = ptr;
1986  std::ignore = pitch;
1987  std::ignore = value;
1988  std::ignore = width;
1989  std::ignore = height;
1990  std::ignore = num_events_in_waitlist;
1991  std::ignore = events_waitlist;
1992  std::ignore = event;
1993  return PI_ERROR_INVALID_OPERATION;
1994 }
1995 
2011  pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch,
2012  const void *src_ptr, size_t src_pitch, size_t width, size_t height,
2013  pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist,
2014  pi_event *event) {
2015  std::ignore = queue;
2016  std::ignore = blocking;
2017  std::ignore = dst_ptr;
2018  std::ignore = dst_pitch;
2019  std::ignore = src_ptr;
2020  std::ignore = src_pitch;
2021  std::ignore = width;
2022  std::ignore = height;
2023  std::ignore = num_events_in_waitlist;
2024  std::ignore = events_waitlist;
2025  std::ignore = event;
2026  return PI_ERROR_INVALID_OPERATION;
2027 }
2028 
2046  pi_mem_alloc_info param_name,
2047  size_t param_value_size, void *param_value,
2048  size_t *param_value_size_ret) {
2049 
2050  clGetMemAllocInfoINTEL_fn FuncPtr = nullptr;
2051  cl_context CLContext = cast<cl_context>(context);
2052  pi_result RetVal = getExtFuncFromContext<clGetMemAllocInfoINTEL_fn>(
2054  clGetMemAllocInfoName, &FuncPtr);
2055 
2056  if (FuncPtr) {
2057  RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr, param_name,
2058  param_value_size, param_value,
2059  param_value_size_ret));
2060  }
2061 
2062  return RetVal;
2063 }
2064 
2065 pi_result piextUSMImport(const void *ptr, size_t size, pi_context context) {
2066  std::ignore = ptr;
2067  std::ignore = size;
2068  std::ignore = context;
2069  return PI_SUCCESS;
2070 }
2071 
2072 pi_result piextUSMRelease(const void *ptr, pi_context context) {
2073  std::ignore = ptr;
2074  std::ignore = context;
2075  return PI_SUCCESS;
2076 }
2077 
2092  pi_queue queue, pi_program program, const char *name,
2093  pi_bool blocking_write, size_t count, size_t offset, const void *src,
2094  pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
2095  pi_event *event) {
2096  cl_context Ctx = nullptr;
2097  cl_int Res =
2098  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2099  sizeof(Ctx), &Ctx, nullptr);
2100 
2101  if (Res != CL_SUCCESS)
2102  return cast<pi_result>(Res);
2103 
2104  clEnqueueWriteGlobalVariable_fn F = nullptr;
2105  Res = getExtFuncFromContext<decltype(F)>(
2108 
2109  if (!F || Res != CL_SUCCESS)
2110  return PI_ERROR_INVALID_OPERATION;
2111  Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
2112  blocking_write, count, offset, src, num_events_in_wait_list,
2113  cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
2114  return cast<pi_result>(Res);
2115 }
2116 
2131  pi_queue queue, pi_program program, const char *name, pi_bool blocking_read,
2132  size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list,
2133  const pi_event *event_wait_list, pi_event *event) {
2134  cl_context Ctx = nullptr;
2135  cl_int Res =
2136  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2137  sizeof(Ctx), &Ctx, nullptr);
2138 
2139  if (Res != CL_SUCCESS)
2140  return cast<pi_result>(Res);
2141 
2142  clEnqueueReadGlobalVariable_fn F = nullptr;
2143  Res = getExtFuncFromContext<decltype(F)>(
2146 
2147  if (!F || Res != CL_SUCCESS)
2148  return PI_ERROR_INVALID_OPERATION;
2149  Res = F(cast<cl_command_queue>(queue), cast<cl_program>(program), name,
2150  blocking_read, count, offset, dst, num_events_in_wait_list,
2151  cast<const cl_event *>(event_wait_list), cast<cl_event *>(event));
2152  return cast<pi_result>(Res);
2153 }
2154 
2156  const char *pipe_symbol, pi_bool blocking,
2157  void *ptr, size_t size,
2158  pi_uint32 num_events_in_waitlist,
2159  const pi_event *events_waitlist,
2160  pi_event *event) {
2161  cl_context CLContext;
2162  cl_int CLErr =
2163  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2164  sizeof(cl_context), &CLContext, nullptr);
2165  if (CLErr != CL_SUCCESS) {
2166  return cast<pi_result>(CLErr);
2167  }
2168 
2169  clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr;
2170  pi_result RetVal = getExtFuncFromContext<clEnqueueReadHostPipeINTEL_fn>(
2172  clEnqueueReadHostPipeName, &FuncPtr);
2173 
2174  if (FuncPtr) {
2175  RetVal = cast<pi_result>(FuncPtr(
2176  cast<cl_command_queue>(queue), cast<cl_program>(program), pipe_symbol,
2177  blocking, ptr, size, num_events_in_waitlist,
2178  cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
2179  }
2180 
2181  return RetVal;
2182 }
2183 
2185  const char *pipe_symbol, pi_bool blocking,
2186  void *ptr, size_t size,
2187  pi_uint32 num_events_in_waitlist,
2188  const pi_event *events_waitlist,
2189  pi_event *event) {
2190  cl_context CLContext;
2191  cl_int CLErr =
2192  clGetCommandQueueInfo(cast<cl_command_queue>(queue), CL_QUEUE_CONTEXT,
2193  sizeof(cl_context), &CLContext, nullptr);
2194  if (CLErr != CL_SUCCESS) {
2195  return cast<pi_result>(CLErr);
2196  }
2197 
2198  clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr;
2199  pi_result RetVal = getExtFuncFromContext<clEnqueueWriteHostPipeINTEL_fn>(
2201  clEnqueueWriteHostPipeName, &FuncPtr);
2202 
2203  if (FuncPtr) {
2204  RetVal = cast<pi_result>(FuncPtr(
2205  cast<cl_command_queue>(queue), cast<cl_program>(program), pipe_symbol,
2206  blocking, ptr, size, num_events_in_waitlist,
2207  cast<const cl_event *>(events_waitlist), cast<cl_event *>(event)));
2208  }
2209 
2210  return RetVal;
2211 }
2212 
2225  size_t param_value_size,
2226  const void *param_value) {
2227  if (param_name == PI_USM_INDIRECT_ACCESS &&
2228  *(static_cast<const pi_bool *>(param_value)) == PI_TRUE) {
2229  return USMSetIndirectAccess(kernel);
2230  } else {
2231  return cast<pi_result>(clSetKernelExecInfo(
2232  cast<cl_kernel>(kernel), param_name, param_value_size, param_value));
2233  }
2234 }
2235 
2237  pi_uint32 spec_id,
2238  size_t spec_size,
2239  const void *spec_value) {
2240  cl_program ClProg = cast<cl_program>(prog);
2241  cl_context Ctx = nullptr;
2242  size_t RetSize = 0;
2243  cl_int Res =
2244  clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), &Ctx, &RetSize);
2245 
2246  if (Res != CL_SUCCESS)
2247  return cast<pi_result>(Res);
2248 
2250  Res = getExtFuncFromContext<decltype(F)>(
2253 
2254  if (!F || Res != CL_SUCCESS)
2255  return PI_ERROR_INVALID_OPERATION;
2256  Res = F(ClProg, spec_id, spec_size, spec_value);
2257  return cast<pi_result>(Res);
2258 }
2259 
2266 static pi_result piextGetNativeHandle(void *piObj,
2267  pi_native_handle *nativeHandle) {
2268  assert(nativeHandle != nullptr);
2269  *nativeHandle = reinterpret_cast<pi_native_handle>(piObj);
2270  return PI_SUCCESS;
2271 }
2272 
2274  pi_native_handle *nativeHandle) {
2275  return piextGetNativeHandle(platform, nativeHandle);
2276 }
2277 
2279  pi_native_handle *nativeHandle) {
2280  return piextGetNativeHandle(device, nativeHandle);
2281 }
2282 
2284  pi_native_handle *nativeHandle) {
2285  return piextGetNativeHandle(context, nativeHandle);
2286 }
2287 
2289  pi_native_handle *nativeHandle,
2290  int32_t *nativeHandleDesc) {
2291  *nativeHandleDesc = 0;
2292  return piextGetNativeHandle(queue, nativeHandle);
2293 }
2294 
2296  return piextGetNativeHandle(mem, nativeHandle);
2297 }
2298 
2300  pi_native_handle *nativeHandle) {
2301  return piextGetNativeHandle(program, nativeHandle);
2302 }
2303 
2305  pi_native_handle *nativeHandle) {
2306  return piextGetNativeHandle(kernel, nativeHandle);
2307 }
2308 
2309 // command-buffer extension
2311  const pi_ext_command_buffer_desc *desc,
2312  pi_ext_command_buffer *ret_command_buffer) {
2313  (void)context;
2314  (void)device;
2315  (void)desc;
2316  (void)ret_command_buffer;
2317 
2318  // Not implemented
2319  return PI_ERROR_INVALID_OPERATION;
2320 }
2321 
2323  (void)command_buffer;
2324 
2325  // Not implemented
2326  return PI_ERROR_INVALID_OPERATION;
2327 }
2328 
2330  (void)command_buffer;
2331 
2332  // Not implemented
2333  return PI_ERROR_INVALID_OPERATION;
2334 }
2335 
2337  (void)command_buffer;
2338 
2339  // Not implemented
2340  return PI_ERROR_INVALID_OPERATION;
2341 }
2342 
2344  pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
2345  const size_t *global_work_offset, const size_t *global_work_size,
2346  const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
2347  const pi_ext_sync_point *sync_point_wait_list,
2348  pi_ext_sync_point *sync_point) {
2349  (void)command_buffer;
2350  (void)kernel;
2351  (void)work_dim;
2352  (void)global_work_offset;
2353  (void)global_work_size;
2354  (void)local_work_size;
2355  (void)num_sync_points_in_wait_list;
2356  (void)sync_point_wait_list;
2357  (void)sync_point;
2358 
2359  // Not implemented
2360  return PI_ERROR_INVALID_OPERATION;
2361 }
2362 
2363 pi_result
2365  const void *src_ptr, size_t size,
2366  pi_uint32 num_sync_points_in_wait_list,
2367  const pi_ext_sync_point *sync_point_wait_list,
2368  pi_ext_sync_point *sync_point) {
2369  (void)command_buffer;
2370  (void)dst_ptr;
2371  (void)src_ptr;
2372  (void)size;
2373  (void)num_sync_points_in_wait_list;
2374  (void)sync_point_wait_list;
2375  (void)sync_point;
2376 
2377  // Not implemented
2378  return PI_ERROR_INVALID_OPERATION;
2379 }
2380 
2382  pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
2383  size_t src_offset, size_t dst_offset, size_t size,
2384  pi_uint32 num_sync_points_in_wait_list,
2385  const pi_ext_sync_point *sync_point_wait_list,
2386  pi_ext_sync_point *sync_point) {
2387  (void)command_buffer;
2388  (void)src_buffer;
2389  (void)dst_buffer;
2390  (void)src_offset;
2391  (void)dst_offset;
2392  (void)size;
2393  (void)num_sync_points_in_wait_list;
2394  (void)sync_point_wait_list;
2395  (void)sync_point;
2396 
2397  // Not implemented
2398  return PI_ERROR_INVALID_OPERATION;
2399 }
2400 
2402  pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
2403  pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
2404  pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
2405  size_t dst_row_pitch, size_t dst_slice_pitch,
2406  pi_uint32 num_sync_points_in_wait_list,
2407  const pi_ext_sync_point *sync_point_wait_list,
2408  pi_ext_sync_point *sync_point) {
2409  (void)command_buffer;
2410  (void)src_buffer;
2411  (void)dst_buffer;
2412  (void)src_origin;
2413  (void)dst_origin;
2414  (void)region;
2415  (void)src_row_pitch;
2416  (void)src_slice_pitch;
2417  (void)dst_row_pitch;
2418  (void)dst_slice_pitch;
2419  (void)num_sync_points_in_wait_list;
2420  (void)sync_point_wait_list;
2421  (void)sync_point;
2422 
2423  // Not implemented
2424  return PI_ERROR_INVALID_OPERATION;
2425 }
2426 
2428  pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
2429  size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list,
2430  const pi_ext_sync_point *sync_point_wait_list,
2431  pi_ext_sync_point *sync_point) {
2432  (void)command_buffer;
2433  (void)buffer;
2434  (void)offset;
2435  (void)size;
2436  (void)dst;
2437  (void)num_sync_points_in_wait_list;
2438  (void)sync_point_wait_list;
2439  (void)sync_point;
2440 
2441  // Not implemented
2442  return PI_ERROR_INVALID_OPERATION;
2443 }
2444 
2446  pi_ext_command_buffer command_buffer, pi_mem buffer,
2447  pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
2448  pi_buff_rect_region region, size_t buffer_row_pitch,
2449  size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
2450  void *ptr, pi_uint32 num_sync_points_in_wait_list,
2451  const pi_ext_sync_point *sync_point_wait_list,
2452  pi_ext_sync_point *sync_point) {
2453  (void)command_buffer;
2454  (void)buffer;
2455  (void)buffer_offset;
2456  (void)host_offset;
2457  (void)region;
2458  (void)buffer_row_pitch;
2459  (void)buffer_slice_pitch;
2460  (void)host_row_pitch;
2461  (void)host_slice_pitch;
2462  (void)ptr;
2463  (void)num_sync_points_in_wait_list;
2464  (void)sync_point_wait_list;
2465  (void)sync_point;
2466 
2467  // Not implemented
2468  return PI_ERROR_INVALID_OPERATION;
2469 }
2470 
2472  pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
2473  size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list,
2474  const pi_ext_sync_point *sync_point_wait_list,
2475  pi_ext_sync_point *sync_point) {
2476  (void)command_buffer;
2477  (void)buffer;
2478  (void)offset;
2479  (void)size;
2480  (void)ptr;
2481  (void)num_sync_points_in_wait_list;
2482  (void)sync_point_wait_list;
2483  (void)sync_point;
2484 
2485  // Not implemented
2486  return PI_ERROR_INVALID_OPERATION;
2487 }
2488 
2490  pi_ext_command_buffer command_buffer, pi_mem buffer,
2491  pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
2492  pi_buff_rect_region region, size_t buffer_row_pitch,
2493  size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
2494  const void *ptr, pi_uint32 num_sync_points_in_wait_list,
2495  const pi_ext_sync_point *sync_point_wait_list,
2496  pi_ext_sync_point *sync_point) {
2497  (void)command_buffer;
2498  (void)buffer;
2499  (void)buffer_offset;
2500  (void)host_offset;
2501  (void)region;
2502  (void)buffer_row_pitch;
2503  (void)buffer_slice_pitch;
2504  (void)host_row_pitch;
2505  (void)host_slice_pitch;
2506  (void)ptr;
2507  (void)num_sync_points_in_wait_list;
2508  (void)sync_point_wait_list;
2509  (void)sync_point;
2510 
2511  // Not implemented
2512  return PI_ERROR_INVALID_OPERATION;
2513 }
2514 
2516  pi_queue queue,
2517  pi_uint32 num_events_in_wait_list,
2518  const pi_event *event_wait_list,
2519  pi_event *event) {
2520  (void)command_buffer;
2521  (void)queue;
2522  (void)num_events_in_wait_list;
2523  (void)event_wait_list;
2524  (void)event;
2525 
2526  // Not implemented
2527  return PI_ERROR_INVALID_OPERATION;
2528 }
2529 
2530 // This API is called by Sycl RT to notify the end of the plugin lifetime.
2531 // Windows: dynamically loaded plugins might have been unloaded already
2532 // when this is called. Sycl RT holds onto the PI plugin so it can be
2533 // called safely. But this is not transitive. If the PI plugin in turn
2534 // dynamically loaded a different DLL, that may have been unloaded.
2535 // TODO: add a global variable lifetime management code here (see
2536 // pi_level_zero.cpp for reference).
2537 pi_result piTearDown(void *PluginParameter) {
2538  (void)PluginParameter;
2539  delete ExtFuncPtrCache;
2540  ExtFuncPtrCache = nullptr;
2541  return PI_SUCCESS;
2542 }
2543 
2544 pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime,
2545  uint64_t *HostTime) {
2546  OCLV::OpenCLVersion devVer, platVer;
2547  cl_platform_id platform;
2548  cl_device_id deviceID = cast<cl_device_id>(Device);
2549 
2550  // TODO: Cache OpenCL version for each device and platform
2551  auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
2552  sizeof(cl_platform_id), &platform, nullptr);
2553  if (ret_err != CL_SUCCESS) {
2554  return cast<pi_result>(ret_err);
2555  }
2556 
2557  ret_err = getDeviceVersion(deviceID, devVer);
2558 
2559  if (ret_err != CL_SUCCESS) {
2560  return cast<pi_result>(ret_err);
2561  }
2562 
2563  ret_err = getPlatformVersion(platform, platVer);
2564 
2565  if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) {
2567  "OpenCL version for device and/or platform is less than 2.1",
2568  PI_ERROR_INVALID_OPERATION);
2569  return PI_ERROR_INVALID_OPERATION;
2570  }
2571 
2572  if (DeviceTime) {
2573  uint64_t dummy;
2574  clGetDeviceAndHostTimer(deviceID, DeviceTime,
2575  HostTime == nullptr ? &dummy : HostTime);
2576 
2577  } else if (HostTime) {
2578  clGetHostTimer(deviceID, HostTime);
2579  }
2580 
2581  return PI_SUCCESS;
2582 }
2583 
2585  size_t param_value_size, void *param_value,
2586  size_t *param_value_size_ret) {
2587  cl_int result =
2588  clGetEventInfo(reinterpret_cast<cl_event>(event), param_name,
2589  param_value_size, param_value, param_value_size_ret);
2590  if (result == CL_SUCCESS && param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) {
2591  // If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change
2592  // it to CL_SUBMITTED. This change is needed since
2593  // sycl::info::event::event_command_status has no equivalent to CL_QUEUED.
2594  const auto param_value_int = static_cast<cl_int *>(param_value);
2595  if (*param_value_int == CL_QUEUED) {
2596  *param_value_int = CL_SUBMITTED;
2597  }
2598  }
2599  return static_cast<pi_result>(result);
2600 }
2601 
2603 
2605  // Check that the major version matches in PiVersion and SupportedVersion
2607 
2608  // PI interface supports higher version or the same version.
2609  size_t PluginVersionSize = sizeof(PluginInit->PluginVersion);
2610  if (strlen(SupportedVersion) >= PluginVersionSize)
2611  return PI_ERROR_INVALID_VALUE;
2612  strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize);
2613 
2614 #define _PI_CL(pi_api, ocl_api) \
2615  (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api);
2616 
2617  // Platform
2623  // Device
2626  _PI_CL(piDevicePartition, clCreateSubDevices)
2627  _PI_CL(piDeviceRetain, clRetainDevice)
2628  _PI_CL(piDeviceRelease, clReleaseDevice)
2633  // Context
2636  _PI_CL(piContextRetain, clRetainContext)
2637  _PI_CL(piContextRelease, clReleaseContext)
2640  // Queue
2644  _PI_CL(piQueueFinish, clFinish)
2645  _PI_CL(piQueueFlush, clFlush)
2646  _PI_CL(piQueueRetain, clRetainCommandQueue)
2647  _PI_CL(piQueueRelease, clReleaseCommandQueue)
2650  // Memory
2653  _PI_CL(piMemGetInfo, clGetMemObjectInfo)
2654  _PI_CL(piMemImageGetInfo, clGetImageInfo)
2655  _PI_CL(piMemRetain, clRetainMemObject)
2656  _PI_CL(piMemRelease, clReleaseMemObject)
2660  // Program
2663  _PI_CL(piProgramGetInfo, clGetProgramInfo)
2664  _PI_CL(piProgramCompile, clCompileProgram)
2665  _PI_CL(piProgramBuild, clBuildProgram)
2667  _PI_CL(piProgramGetBuildInfo, clGetProgramBuildInfo)
2668  _PI_CL(piProgramRetain, clRetainProgram)
2669  _PI_CL(piProgramRelease, clReleaseProgram)
2674  // Kernel
2676  _PI_CL(piKernelSetArg, clSetKernelArg)
2677  _PI_CL(piKernelGetInfo, clGetKernelInfo)
2680  _PI_CL(piKernelRetain, clRetainKernel)
2681  _PI_CL(piKernelRelease, clReleaseKernel)
2686  // Event
2689  _PI_CL(piEventGetProfilingInfo, clGetEventProfilingInfo)
2690  _PI_CL(piEventsWait, clWaitForEvents)
2691  _PI_CL(piEventSetCallback, clSetEventCallback)
2692  _PI_CL(piEventSetStatus, clSetUserEventStatus)
2693  _PI_CL(piEventRetain, clRetainEvent)
2694  _PI_CL(piEventRelease, clReleaseEvent)
2697  // Sampler
2699  _PI_CL(piSamplerGetInfo, clGetSamplerInfo)
2700  _PI_CL(piSamplerRetain, clRetainSampler)
2701  _PI_CL(piSamplerRelease, clReleaseSampler)
2702  // Queue commands
2703  _PI_CL(piEnqueueKernelLaunch, clEnqueueNDRangeKernel)
2704  _PI_CL(piEnqueueEventsWait, clEnqueueMarkerWithWaitList)
2705  _PI_CL(piEnqueueEventsWaitWithBarrier, clEnqueueBarrierWithWaitList)
2706  _PI_CL(piEnqueueMemBufferRead, clEnqueueReadBuffer)
2707  _PI_CL(piEnqueueMemBufferReadRect, clEnqueueReadBufferRect)
2708  _PI_CL(piEnqueueMemBufferWrite, clEnqueueWriteBuffer)
2709  _PI_CL(piEnqueueMemBufferWriteRect, clEnqueueWriteBufferRect)
2710  _PI_CL(piEnqueueMemBufferCopy, clEnqueueCopyBuffer)
2711  _PI_CL(piEnqueueMemBufferCopyRect, clEnqueueCopyBufferRect)
2712  _PI_CL(piEnqueueMemBufferFill, clEnqueueFillBuffer)
2713  _PI_CL(piEnqueueMemImageRead, clEnqueueReadImage)
2714  _PI_CL(piEnqueueMemImageWrite, clEnqueueWriteImage)
2715  _PI_CL(piEnqueueMemImageCopy, clEnqueueCopyImage)
2716  _PI_CL(piEnqueueMemImageFill, clEnqueueFillImage)
2718  _PI_CL(piEnqueueMemUnmap, clEnqueueUnmapMemObject)
2719  // USM
2734  // Device global variable
2739  // Host Pipe
2742 
2743  // command-buffer
2759 
2766 
2767 #undef _PI_CL
2768 
2769  return PI_SUCCESS;
2770 }
2771 
2772 #ifdef _WIN32
2773 #define __SYCL_PLUGIN_DLL_NAME "pi_opencl.dll"
2774 #include "../common_win_pi_trace/common_win_pi_trace.hpp"
2775 #undef __SYCL_PLUGIN_DLL_NAME
2776 #endif
2777 
2778 } // end extern 'C'
piContextCreate
pi_result piContextCreate(const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data1), void *user_data, pi_context *retcontext)
Definition: pi_opencl.cpp:1255
piEventGetProfilingInfo
pi_result piEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:567
_pi_mem
Definition: pi_cuda.hpp:56
ExtFuncPtrCacheT::clEnqueueMemcpyINTELCache
FuncPtrCache< clEnqueueMemcpyINTEL_fn > clEnqueueMemcpyINTELCache
Definition: pi_opencl.cpp:231
piEventRelease
pi_result piEventRelease(pi_event event)
Definition: pi_cuda.cpp:594
piEnqueueKernelLaunch
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:531
piextPlatformGetNativeHandle
pi_result piextPlatformGetNativeHandle(pi_platform platform, pi_native_handle *nativeHandle)
Gets the native handle of a PI platform object.
Definition: pi_opencl.cpp:2273
sycl::_V1::opencl::cl_int
std::int32_t cl_int
Definition: aliases.hpp:134
piMemGetInfo
pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:212
piextProgramGetNativeHandle
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
Definition: pi_opencl.cpp:2299
PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
Definition: pi.h:455
sycl::_V1::opencl::cl_uint
std::uint32_t cl_uint
Definition: aliases.hpp:135
clGetMemAllocInfoName
CONSTFIX char clGetMemAllocInfoName[]
Definition: pi_opencl.cpp:67
piextKernelSetArgMemObj
pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem_obj_property *arg_properties, const pi_mem *arg_value)
Definition: pi_opencl.cpp:1141
ExtFuncPtrCacheT::clHostMemAllocINTELCache
FuncPtrCache< clHostMemAllocINTEL_fn > clHostMemAllocINTELCache
Definition: pi_opencl.cpp:221
_pi_context_info
_pi_context_info
Definition: pi.h:448
pi_buff_rect_offset_struct
Definition: pi.h:1016
pi.h
_pi_platform_backend
_pi_platform_backend
Definition: pi.h:264
piKernelSetArg
pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Definition: pi_cuda.cpp:341
clSetProgramSpecializationConstant_fn
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value) clSetProgramSpecializationConstant_fn
Definition: pi_opencl.cpp:209
piPluginInit
pi_result piPluginInit(pi_plugin *PluginInit)
Definition: pi_opencl.cpp:2604
piextEnqueueDeviceGlobalVariableRead
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API reading data from a device global variable to host.
Definition: pi_opencl.cpp:2130
piextProgramCreateWithNativeHandle
pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, bool, pi_program *piProgram)
Creates PI program object from a native handle.
Definition: pi_opencl.cpp:1102
PI_DEVICE_INFO_GPU_EU_COUNT
@ PI_DEVICE_INFO_GPU_EU_COUNT
Definition: pi.h:376
piMemBufferCreate
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties)
Definition: pi_opencl.cpp:1314
pi_bool
pi_uint32 pi_bool
Definition: pi.h:196
piPlatformsGet
pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms)
Definition: pi_opencl.cpp:771
CHECK_ERR_SET_NULL_RET
#define CHECK_ERR_SET_NULL_RET(err, ptr, reterr)
Definition: pi_opencl.cpp:36
piKernelGetGroupInfo
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_opencl.cpp:1433
piPlatformGetInfo
pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Definition: pi_opencl.cpp:786
ExtFuncPtrCacheT::clEnqueueWriteHostPipeINTELCache
FuncPtrCache< clEnqueueWriteHostPipeINTEL_fn > clEnqueueWriteHostPipeINTELCache
Definition: pi_opencl.cpp:237
piextEnqueueCommandBuffer
pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API to submit the command-buffer to queue for execution, returns an error if the command-buffer is no...
Definition: pi_opencl.cpp:2515
FuncPtrCache::Map
std::map< cl_context, T > Map
Definition: pi_opencl.cpp:212
PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT
@ PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT
Definition: pi.h:413
PI_QUEUE_FLAG_ON_DEVICE
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE
Definition: pi.h:735
PI_MEMORY_ORDER_ACQUIRE
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQUIRE
Definition: pi.h:671
piextGetDeviceFunctionPointer
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *func_name, pi_uint64 *function_pointer_ret)
Retrieves a device function pointer to a user-defined function.
Definition: pi_opencl.cpp:1191
clCreateBufferWithPropertiesName
CONSTFIX char clCreateBufferWithPropertiesName[]
Definition: pi_opencl.cpp:62
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
Definition: pi.h:461
PI_DEVICE_INFO_IMAGE_SRGB
@ PI_DEVICE_INFO_IMAGE_SRGB
Definition: pi.h:382
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_cuda.cpp:314
piextProgramSetSpecializationConstant
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_opencl.cpp:2236
_pi_plugin
Definition: pi.h:2694
sycl::_V1::opencl::cl_ulong
std::uint64_t cl_ulong
Definition: aliases.hpp:137
piextCommandBufferMemcpyUSM
pi_result piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a USM memcpy command to the command-buffer.
Definition: pi_opencl.cpp:2364
piDevicePartition
pi_result piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
Definition: pi_cuda.cpp:80
PI_KERNEL_COMPILE_NUM_SUB_GROUPS
@ PI_KERNEL_COMPILE_NUM_SUB_GROUPS
Definition: pi.h:510
is_in_separated_string
static bool is_in_separated_string(const std::string &str, char delimiter, const std::string &sub_str)
Definition: pi_opencl.cpp:1169
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:567
PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
@ PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
Definition: pi.h:290
piEnqueueMemBufferCopy
pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:703
clSharedMemAllocName
CONSTFIX char clSharedMemAllocName[]
Definition: pi_opencl.cpp:60
_pi_plugin::PluginVersion
char PluginVersion[20]
Definition: pi.h:2704
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
@ PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
Definition: pi.h:398
ExtFuncPtrCacheT::clEnqueueWriteGlobalVariableCache
FuncPtrCache< clEnqueueWriteGlobalVariable_fn > clEnqueueWriteGlobalVariableCache
Definition: pi_opencl.cpp:234
piDevicesGet
pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices)
Definition: pi_opencl.cpp:820
_pi_result
_pi_result
Definition: pi.h:205
sycl::_V1::detail::memcpy
void memcpy(void *Dst, const void *Src, size_t Size)
Definition: memcpy.hpp:16
piTearDown
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
Definition: pi_opencl.cpp:2537
pi_context_properties
intptr_t pi_context_properties
Definition: pi.h:654
piEnqueueMemUnmap
pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, void *mapped_ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:750
PI_SAMPLER_INFO_FILTER_MODE
@ PI_SAMPLER_INFO_FILTER_MODE
Definition: pi.h:635
piextEnqueueReadHostPipe
pi_result piextEnqueueReadHostPipe(pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Plugin.
Definition: pi_opencl.cpp:2155
sycl::_V1::ext::oneapi::experimental::alignment
constexpr alignment_key::value_t< K > alignment
Definition: properties.hpp:65
PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH
Definition: pi.h:739
piextKernelGetNativeHandle
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
Definition: pi_opencl.cpp:2304
piSamplerGetInfo
pi_result piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:618
piEventCreate
pi_result piEventCreate(pi_context context, pi_event *ret_event)
Create PI event object in a signalled/completed state.
Definition: pi_opencl.cpp:1538
piextUSMEnqueueMemset
pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, size_t count, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memset API.
Definition: pi_opencl.cpp:1770
PI_DEVICE_INFO_GPU_SLICES
@ PI_DEVICE_INFO_GPU_SLICES
Definition: pi.h:378
OCLV::OpenCLVersion::isValid
bool isValid() const
Definition: pi_opencl.hpp:90
piextUSMDeviceAlloc
pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates device memory.
Definition: pi_opencl.cpp:1632
piDeviceRetain
pi_result piDeviceRetain(pi_device device)
Definition: pi_cuda.cpp:65
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
_pi_device_type
_pi_device_type
Definition: pi.h:253
piQueueRelease
pi_result piQueueRelease(pi_queue command_queue)
Definition: pi_cuda.cpp:177
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_opencl.cpp:1390
CONSTFIX
#define CONSTFIX
Definition: pi_opencl.cpp:54
piMemBufferPartition
pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem)
Definition: pi_opencl.cpp:1355
PI_MEMORY_SCOPE_WORK_ITEM
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM
Definition: pi.h:677
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
Definition: pi.h:380
piextUSMEnqueueMemAdvise
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event)
USM Memadvise API.
Definition: pi_opencl.cpp:1898
piextMemCreateWithNativeHandle
pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_mem *piMem)
Creates PI mem object from a native handle.
Definition: pi_opencl.cpp:1367
PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
Definition: pi.h:456
ExtFuncPtrCacheT::clSetProgramSpecializationConstantCache
FuncPtrCache< clSetProgramSpecializationConstant_fn > clSetProgramSpecializationConstantCache
Definition: pi_opencl.cpp:239
piextEnqueueDeviceGlobalVariableWrite
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
API for writing data from host to a device global variable.
Definition: pi_opencl.cpp:2091
piGetDeviceAndHostTimer
pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime)
Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the h...
Definition: pi_opencl.cpp:2544
PI_USM_INDIRECT_ACCESS
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1532
piextUSMHostAlloc
pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates host memory accessible by the device.
Definition: pi_opencl.cpp:1595
piextEventCreateWithNativeHandle
pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, pi_event *piEvent)
Creates PI event object from a native handle.
Definition: pi_opencl.cpp:1551
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
_pi_platform
Definition: pi_cuda.hpp:44
pi_ext_command_buffer_desc
Definition: pi.h:2266
piEnqueueMemImageFill
pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const void *fill_color, const size_t *origin, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:801
PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
@ PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
Definition: pi.h:362
OCLV::V2_1
const OpenCLVersion V2_1(2, 1)
_pi_sampler_addressing_mode
_pi_sampler_addressing_mode
Definition: pi.h:641
piKernelRelease
pi_result piKernelRelease(pi_kernel kernel)
Definition: pi_cuda.cpp:525
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:907
getDeviceVersion
static cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version)
Definition: pi_opencl.cpp:150
ExtFuncPtrCache
static ExtFuncPtrCacheT * ExtFuncPtrCache
Definition: pi_opencl.cpp:245
piextCommandBufferMemBufferCopyRect
pi_result piextCommandBufferMemBufferCopyRect(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer copy command to the command-buffer.
Definition: pi_opencl.cpp:2401
cast
To cast(From value)
Definition: pi_opencl.cpp:44
_pi_device_info
_pi_device_info
Definition: pi.h:285
SupportedVersion
const char SupportedVersion[]
Definition: pi_opencl.cpp:2602
piextUSMRelease
pi_result piextUSMRelease(const void *ptr, pi_context context)
Release host system memory from USM.
Definition: pi_opencl.cpp:2072
MaxMessageSize
constexpr size_t MaxMessageSize
Definition: pi_opencl.cpp:83
piEventSetStatus
pi_result piEventSetStatus(pi_event event, pi_int32 execution_status)
Definition: pi_cuda.cpp:588
piextContextGetNativeHandle
pi_result piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle)
Gets the native handle of a PI context object.
Definition: pi_opencl.cpp:2283
ExtFuncPtrCacheT
Definition: pi_opencl.cpp:220
sycl::_V1::ext::intel::host_ptr
multi_ptr< ElementType, access::address_space::ext_intel_global_host_space, IsDecorated > host_ptr
Definition: usm_pointers.hpp:33
ExtFuncPtrCacheT::clSetKernelArgMemPointerINTELCache
FuncPtrCache< clSetKernelArgMemPointerINTEL_fn > clSetKernelArgMemPointerINTELCache
Definition: pi_opencl.cpp:229
PI_SAMPLER_FILTER_MODE_NEAREST
@ PI_SAMPLER_FILTER_MODE_NEAREST
Definition: pi.h:650
_pi_kernel
Definition: pi_cuda.hpp:72
piextCommandBufferRetain
pi_result piextCommandBufferRetain(pi_ext_command_buffer command_buffer)
API to increment the reference count of the command-buffer.
Definition: pi_opencl.cpp:2322
piQueueCreate
pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
Definition: pi_opencl.cpp:929
piextContextCreateWithNativeHandle
pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_uint32 num_devices, const pi_device *devices, bool ownNativeHandle, pi_context *piContext)
Creates PI context object from a native handle.
Definition: pi_opencl.cpp:1270
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:266
clEnqueueReadGlobalVariableName
CONSTFIX char clEnqueueReadGlobalVariableName[]
Definition: pi_opencl.cpp:74
_pi_plugin::PiVersion
char PiVersion[20]
Definition: pi.h:2702
piextDeviceCreateWithNativeHandle
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform, pi_device *piDevice)
Creates PI device object from a native handle.
Definition: pi_opencl.cpp:908
ExtFuncPtrCacheT::clGetMemAllocInfoINTELCache
FuncPtrCache< clGetMemAllocInfoINTEL_fn > clGetMemAllocInfoINTELCache
Definition: pi_opencl.cpp:232
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:462
_pi_ext_command_buffer
Definition: pi_cuda.hpp:80
_pi_sampler_filter_mode
_pi_sampler_filter_mode
Definition: pi.h:649
pi_opencl.hpp
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
Definition: pi.h:397
_pi_queue_info
_pi_queue_info
Definition: pi.h:465
_pi_buffer_create_type
_pi_buffer_create_type
Definition: pi.h:625
FuncPtrCache::Mutex
std::mutex Mutex
Definition: pi_opencl.cpp:213
clMemBlockingFreeName
CONSTFIX char clMemBlockingFreeName[]
Definition: pi_opencl.cpp:61
clGetDeviceFunctionPointerName
CONSTFIX char clGetDeviceFunctionPointerName[]
Definition: pi_opencl.cpp:70
ExtFuncPtrCacheT::clSharedMemAllocINTELCache
FuncPtrCache< clSharedMemAllocINTEL_fn > clSharedMemAllocINTELCache
Definition: pi_opencl.cpp:223
piEnqueueMemBufferMap
pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, pi_map_flags map_flags, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map)
Definition: pi_opencl.cpp:1567
piextPlatformCreateWithNativeHandle
pi_result piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform *platform)
Creates PI platform object from a native handle.
Definition: pi_opencl.cpp:812
piextCommandBufferNDRangeKernel
pi_result piextCommandBufferNDRangeKernel(pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a kernel execution command to the command-buffer.
Definition: pi_opencl.cpp:2343
ExtFuncPtrCacheT::clEnqueueReadHostPipeINTELCache
FuncPtrCache< clEnqueueReadHostPipeINTEL_fn > clEnqueueReadHostPipeINTELCache
Definition: pi_opencl.cpp:236
PI_MEMORY_ORDER_RELAXED
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED
Definition: pi.h:670
FuncPtrCache
Definition: pi_opencl.cpp:211
piextUSMEnqueuePrefetch
pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Hint to migrate memory to the device.
Definition: pi_opencl.cpp:1848
piEventsWait
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_cuda.cpp:575
PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS
@ PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS
Definition: pi.h:401
piContextRelease
pi_result piContextRelease(pi_context context)
Definition: pi_cuda.cpp:152
_pi_queue
Definition: pi_cuda.hpp:60
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_cuda.cpp:318
checkDeviceExtensions
static cl_int checkDeviceExtensions(cl_device_id dev, const std::vector< std::string > &exts, bool &supported)
Definition: pi_opencl.cpp:170
pi_uint32
uint32_t pi_uint32
Definition: pi.h:194
OCLV::V2_0
const OpenCLVersion V2_0(2, 0)
piEnqueueMemBufferReadRect
pi_result piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:663
pi_buff_rect_region_struct
Definition: pi.h:1025
piMemRelease
pi_result piMemRelease(pi_mem mem)
Definition: pi_cuda.cpp:220
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:959
piEnqueueMemBufferCopyRect
pi_result piEnqueueMemBufferCopyRect(pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:714
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Definition: pi_opencl.cpp:1284
piKernelGetSubGroupInfo
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information from the sub-group from a kernel.
Definition: pi_opencl.cpp:1453
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_opencl.cpp:355
cl.h
_pi_kernel_exec_info
_pi_kernel_exec_info
Definition: pi.h:1530
PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
Definition: pi.h:737
piextCommandBufferCreate
pi_result piextCommandBufferCreate(pi_context context, pi_device device, const pi_ext_command_buffer_desc *desc, pi_ext_command_buffer *ret_command_buffer)
API to create a command-buffer.
Definition: pi_opencl.cpp:2310
piEnqueueMemBufferWrite
pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:677
syclcompat::local_id::x
size_t x()
Definition: id_query.hpp:54
PI_MEMORY_SCOPE_DEVICE
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE
Definition: pi.h:680
pi_mem_flags
pi_bitfield pi_mem_flags
Definition: pi.h:694
PI_MEMORY_SCOPE_SYSTEM
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM
Definition: pi.h:681
piextGetNativeHandle
static pi_result piextGetNativeHandle(void *piObj, pi_native_handle *nativeHandle)
Common API for getting the native handle of a PI object.
Definition: pi_opencl.cpp:2266
piEnqueueMemImageRead
pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, pi_bool blocking_read, pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:766
clDeviceMemAllocName
CONSTFIX char clDeviceMemAllocName[]
Definition: pi_opencl.cpp:59
ErrorMessageCode
thread_local pi_result ErrorMessageCode
Definition: pi_opencl.cpp:84
PI_KERNEL_GROUP_INFO_NUM_REGS
@ PI_KERNEL_GROUP_INFO_NUM_REGS
Definition: pi.h:494
PI_MEMORY_SCOPE_SUB_GROUP
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP
Definition: pi.h:678
piQueueGetInfo
pi_result piQueueGetInfo(pi_queue queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_opencl.cpp:976
PI_KERNEL_MAX_SUB_GROUP_SIZE
@ PI_KERNEL_MAX_SUB_GROUP_SIZE
Definition: pi.h:508
ErrorMessage
thread_local char ErrorMessage[MaxMessageSize]
Definition: pi_opencl.cpp:85
piQueueRetain
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_cuda.cpp:175
PI_MEMORY_ORDER_SEQ_CST
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST
Definition: pi.h:674
piEnqueueEventsWaitWithBarrier
pi_result piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:642
piextMemImageCreateWithNativeHandle
pi_result piextMemImageCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, pi_mem *Img)
Creates PI image object from a native handle.
Definition: pi_opencl.cpp:1377
piKernelRetain
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_cuda.cpp:520
piEventSetCallback
pi_result piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, void(*pfn_notify)(pi_event event, pi_int32 event_command_status, void *user_data), void *user_data)
PI_MEMORY_ORDER_RELEASE
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE
Definition: pi.h:672
piEnqueueMemBufferFill
pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:727
_PI_OPENCL_PLUGIN_VERSION_STRING
#define _PI_OPENCL_PLUGIN_VERSION_STRING
Definition: pi_opencl.hpp:28
pi_ext_sync_point
pi_uint32 pi_ext_sync_point
Definition: pi.h:2260
piextUSMImport
pi_result piextUSMImport(const void *ptr, size_t size, pi_context context)
Import host system memory into USM.
Definition: pi_opencl.cpp:2065
piSamplerRetain
pi_result piSamplerRetain(pi_sampler sampler)
Definition: pi_cuda.cpp:626
PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
Definition: pi.h:458
piMemImageGetInfo
pi_result piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:758
piextUSMEnqueueMemcpy2D
pi_result piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, const void *src_ptr, size_t src_pitch, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memcpy API.
Definition: pi_opencl.cpp:2010
piextEventGetNativeHandle
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
Definition: pi_cuda.cpp:598
OCLV::OpenCLVersion
Definition: pi_opencl.hpp:32
pi_uint64
uint64_t pi_uint64
Definition: pi.h:195
_pi_event_info
_pi_event_info
Definition: pi.h:514
pi_memory_scope_capabilities
pi_bitfield pi_memory_scope_capabilities
Definition: pi.h:676
_pi_device_binary_property_struct
Definition: pi.h:855
ExtFuncPtrCacheT::clGetDeviceFunctionPointerCache
FuncPtrCache< clGetDeviceFunctionPointer_fn > clGetDeviceFunctionPointerCache
Definition: pi_opencl.cpp:224
pi_mem_properties
pi_bitfield pi_mem_properties
Definition: pi.h:710
_pi_program
Definition: pi_cuda.hpp:68
_pi_sampler
Definition: pi_cuda.hpp:76
sycl::_V1::access::target::device
@ device
PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
@ PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
Definition: pi.h:474
piextUSMEnqueueMemset2D
pi_result piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, size_t pitch, int value, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Memset API.
Definition: pi_opencl.cpp:1980
PI_QUEUE_FLAG_ON_DEVICE_DEFAULT
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT
Definition: pi.h:736
sycl::_V1::info::device_type
device_type
Definition: info_desc.hpp:54
piDeviceRelease
pi_result piDeviceRelease(pi_device device)
Definition: pi_cuda.cpp:69
PI_DEVICE_INFO_UUID
@ PI_DEVICE_INFO_UUID
Definition: pi.h:371
clGetDeviceFunctionPointer_fn
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, const char *FuncName, cl_ulong *ret_ptr) clGetDeviceFunctionPointer_fn
Definition: pi_opencl.cpp:195
piextCommandBufferRelease
pi_result piextCommandBufferRelease(pi_ext_command_buffer command_buffer)
API to decrement the reference count of the command-buffer.
Definition: pi_opencl.cpp:2329
PI_KERNEL_MAX_NUM_SUB_GROUPS
@ PI_KERNEL_MAX_NUM_SUB_GROUPS
Definition: pi.h:509
piextUSMSharedAlloc
pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment)
Allocates memory accessible on both host and device.
Definition: pi_opencl.cpp:1671
PI_DEVICE_INFO_PCI_ADDRESS
@ PI_DEVICE_INFO_PCI_ADDRESS
Definition: pi.h:375
piextDeviceSelectBinary
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, pi_uint32 num_images, pi_uint32 *selected_image_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_opencl.cpp:837
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:198
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_opencl.cpp:2224
piQueueFinish
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_cuda.cpp:181
piContextRetain
pi_result piContextRetain(pi_context context)
Definition: pi_cuda.cpp:147
piPluginGetLastError
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
Definition: pi_opencl.cpp:96
pi_sampler_properties
pi_bitfield pi_sampler_properties
Definition: pi.h:662
piProgramCreate
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_opencl.cpp:1012
_pi_image_format
Definition: pi.h:1096
clEnqueueWriteGlobalVariableName
CONSTFIX char clEnqueueWriteGlobalVariableName[]
Definition: pi_opencl.cpp:72
ExtFuncPtrCacheT::clDeviceMemAllocINTELCache
FuncPtrCache< clDeviceMemAllocINTEL_fn > clDeviceMemAllocINTELCache
Definition: pi_opencl.cpp:222
clHostMemAllocName
CONSTFIX char clHostMemAllocName[]
Definition: pi_opencl.cpp:58
clSetKernelArgMemPointerName
CONSTFIX char clSetKernelArgMemPointerName[]
Definition: pi_opencl.cpp:64
PI_MEMORY_SCOPE_WORK_GROUP
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP
Definition: pi.h:679
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
Definition: pi.h:405
iostream_proxy.hpp
PI_PROGRAM_INFO_KERNEL_NAMES
@ PI_PROGRAM_INFO_KERNEL_NAMES
Definition: pi.h:445
setErrorMessage
static void setErrorMessage(const char *message, pi_result error_code)
Definition: pi_opencl.cpp:88
piextUSMEnqueueFill2D
pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, size_t pitch, size_t pattern_size, const void *pattern, size_t width, size_t height, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM 2D Fill API.
Definition: pi_opencl.cpp:1949
piextQueueGetNativeHandle
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc)
Gets the native handle of a PI queue object.
Definition: pi_opencl.cpp:2288
USMSetIndirectAccess
static pi_result USMSetIndirectAccess(pi_kernel kernel)
Enables indirect access of pointers in kernels.
Definition: pi_opencl.cpp:310
PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW
Definition: pi.h:738
_PI_PLUGIN_VERSION_CHECK
#define _PI_PLUGIN_VERSION_CHECK(PI_API_VERSION, PI_PLUGIN_VERSION)
Definition: pi.h:171
piEnqueueMemBufferRead
pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:651
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:384
PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
@ PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
Definition: pi.h:289
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:360
piEnqueueEventsWait
pi_result piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:634
piMemRetain
pi_result piMemRetain(pi_mem mem)
Definition: pi_cuda.cpp:218
piextCommandBufferMemBufferRead
pi_result piextCommandBufferMemBufferRead(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer read command to the command-buffer.
Definition: pi_opencl.cpp:2427
PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
@ PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
Definition: pi.h:381
PI_MEMORY_ORDER_ACQ_REL
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL
Definition: pi.h:673
piextUSMGetMemAllocInfo
pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, pi_mem_alloc_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
Definition: pi_opencl.cpp:2045
pi_queue_properties
pi_bitfield pi_queue_properties
Definition: pi.h:729
PI_DEVICE_INFO_ATOMIC_64
@ PI_DEVICE_INFO_ATOMIC_64
Definition: pi.h:395
_pi_platform_info
_pi_platform_info
Definition: pi.h:220
PI_DEVICE_INFO_BACKEND_VERSION
@ PI_DEVICE_INFO_BACKEND_VERSION
Definition: pi.h:399
piEventRetain
pi_result piEventRetain(pi_event event)
Definition: pi_cuda.cpp:592
_pi_image_desc
Definition: pi.h:1101
clEnqueueReadGlobalVariable_fn
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *) clEnqueueReadGlobalVariable_fn
Definition: pi_opencl.cpp:205
PI_EXT_PLATFORM_INFO_BACKEND
@ PI_EXT_PLATFORM_INFO_BACKEND
Definition: pi.h:226
_pi_event
Definition: pi_cuda.hpp:64
piextMemGetNativeHandle
pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle)
Gets the native handle of a PI mem object.
Definition: pi_opencl.cpp:2295
piextQueueCreateWithNativeHandle
pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, int32_t NativeHandleDesc, pi_context, pi_device, bool ownNativeHandle, pi_queue_properties *Properties, pi_queue *piQueue)
Creates PI queue object from a native handle.
Definition: pi_opencl.cpp:998
PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
@ PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
Definition: pi.h:379
piextCommandBufferMemBufferWrite
pi_result piextCommandBufferMemBufferWrite(pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer write command to the command-buffer.
Definition: pi_opencl.cpp:2471
getPlatformVersion
static cl_int getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version)
Definition: pi_opencl.cpp:128
clEnqueueWriteHostPipeName
CONSTFIX char clEnqueueWriteHostPipeName[]
Definition: pi_opencl.cpp:78
PI_EXT_PLATFORM_BACKEND_OPENCL
@ PI_EXT_PLATFORM_BACKEND_OPENCL
The backend is OpenCL.
Definition: pi.h:267
piextDeviceGetNativeHandle
pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native handle of a PI device object.
Definition: pi_opencl.cpp:2278
piPluginGetBackendOption
pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, const char **backend_option)
API to get backend specific option.
Definition: pi_opencl.cpp:102
piEnqueueMemBufferWriteRect
pi_result piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:689
piSamplerCreate
pi_result piSamplerCreate(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler)
Definition: pi_opencl.cpp:1110
PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES
@ PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES
Definition: pi.h:394
piProgramGetBuildInfo
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:304
piextKernelCreateWithNativeHandle
pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context, pi_program, bool, pi_kernel *piKernel)
Creates PI kernel object from a native handle.
Definition: pi_opencl.cpp:1157
sycl::_V1::opencl::cl_bool
bool cl_bool
Definition: aliases.hpp:129
clEnqueueWriteGlobalVariable_fn
CL_API_ENTRY cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) clEnqueueWriteGlobalVariable_fn
Definition: pi_opencl.cpp:200
clEnqueueMemFillName
CONSTFIX char clEnqueueMemFillName[]
Definition: pi_opencl.cpp:65
PI_DEVICE_INFO_VENDOR_ID
@ PI_DEVICE_INFO_VENDOR_ID
Definition: pi.h:287
piextEnqueueWriteHostPipe
pi_result piextEnqueueWriteHostPipe(pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
Write to pipe of a given name.
Definition: pi_opencl.cpp:2184
piMemImageCreate
pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, const pi_image_format *image_format, const pi_image_desc *image_desc, void *host_ptr, pi_mem *ret_mem)
Definition: pi_opencl.cpp:1341
sycl::_V1::image_format
image_format
Definition: image.hpp:94
piQueueFlush
pi_result piQueueFlush(pi_queue command_queue)
Definition: pi_cuda.cpp:183
_pi_mem_obj_property
Definition: pi.h:1828
_pi_usm_migration_flags
_pi_usm_migration_flags
Definition: pi.h:1886
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
Definition: pi.h:902
PI_QUEUE_FLAG_PROFILING_ENABLE
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
Definition: pi.h:734
PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
Definition: pi.h:733
piextUSMEnqueueMemcpy
pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, const void *src_ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event)
USM Memcpy API.
Definition: pi_opencl.cpp:1809
clSetProgramSpecializationConstantName
CONSTFIX char clSetProgramSpecializationConstantName[]
Definition: pi_opencl.cpp:68
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:906
piextCommandBufferFinalize
pi_result piextCommandBufferFinalize(pi_ext_command_buffer command_buffer)
API to stop command-buffer recording such that no more commands can be appended, and makes the comman...
Definition: pi_opencl.cpp:2336
piextCommandBufferMemBufferWriteRect
pi_result piextCommandBufferMemBufferWriteRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer write command to the command-buffer.
Definition: pi_opencl.cpp:2489
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
Definition: pi.h:408
PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT
@ PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT
Definition: pi.h:410
_PI_CL
#define _PI_CL(pi_api, ocl_api)
ExtFuncPtrCacheT::clEnqueueReadGlobalVariableCache
FuncPtrCache< clEnqueueReadGlobalVariable_fn > clEnqueueReadGlobalVariableCache
Definition: pi_opencl.cpp:235
piEnqueueMemImageCopy
pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_mem dst_image, pi_image_offset src_origin, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:792
_pi_kernel_group_info
_pi_kernel_group_info
Definition: pi.h:486
PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL
@ PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL
Definition: pi.h:364
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:1406
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
Definition: pi.h:396
OCLV::V3_0
const OpenCLVersion V3_0(3, 0)
piextKernelSetArgSampler
pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, const pi_sampler *arg_value)
Definition: pi_opencl.cpp:1150
piextKernelSetArgPointer
pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, const void *arg_value)
Sets up pointer arguments for CL kernels.
Definition: pi_opencl.cpp:1729
PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
@ PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES
Definition: pi.h:457
ExtFuncPtrCacheT::clEnqueueMemFillINTELCache
FuncPtrCache< clEnqueueMemFillINTEL_fn > clEnqueueMemFillINTELCache
Definition: pi_opencl.cpp:230
ExtFuncPtrCacheT::clCreateBufferWithPropertiesINTELCache
FuncPtrCache< clCreateBufferWithPropertiesINTEL_fn > clCreateBufferWithPropertiesINTELCache
Definition: pi_opencl.cpp:226
piSamplerRelease
pi_result piSamplerRelease(pi_sampler sampler)
Definition: pi_cuda.cpp:630
piEventGetInfo
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_opencl.cpp:2584
pi_memory_order_capabilities
pi_bitfield pi_memory_order_capabilities
Definition: pi.h:669
PI_SAMPLER_INFO_NORMALIZED_COORDS
@ PI_SAMPLER_INFO_NORMALIZED_COORDS
Definition: pi.h:633
_pi_mem_alloc_info
_pi_mem_alloc_info
Definition: pi.h:1869
PI_TRUE
const pi_bool PI_TRUE
Definition: pi.h:627
piEnqueueMemImageWrite
pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, pi_bool blocking_write, pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:778
piextCommandBufferMemBufferReadRect
pi_result piextCommandBufferMemBufferReadRect(pi_ext_command_buffer command_buffer, pi_mem buffer, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a rectangular mem buffer read command to the command-buffer.
Definition: pi_opencl.cpp:2445
pi_usm_mem_properties
pi_bitfield pi_usm_mem_properties
Definition: pi.h:716
pi_map_flags
pi_bitfield pi_map_flags
Definition: pi.h:704
piProgramBuild
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:905
PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
@ PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
Definition: pi.h:377
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_opencl.cpp:1424
clEnqueueReadHostPipeName
CONSTFIX char clEnqueueReadHostPipeName[]
Definition: pi_opencl.cpp:77
getExtFuncFromContext
static pi_result getExtFuncFromContext(cl_context context, FuncPtrCache< T > &FPtrCache, const char *FuncName, T *fptr)
Definition: pi_opencl.cpp:249
ExtFuncPtrCacheT::clMemBlockingFreeINTELCache
FuncPtrCache< clMemBlockingFreeINTEL_fn > clMemBlockingFreeINTELCache
Definition: pi_opencl.cpp:227
PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
@ PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
Definition: pi.h:511
piextQueueCreate
pi_result piextQueueCreate(pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue)
Definition: pi_opencl.cpp:915
clEnqueueMemcpyName
CONSTFIX char clEnqueueMemcpyName[]
Definition: pi_opencl.cpp:66
sycl::_V1::image_target
image_target
Definition: access.hpp:74
piextUSMFree
pi_result piextUSMFree(pi_context context, void *ptr)
Frees allocated USM memory in a blocking manner.
Definition: pi_opencl.cpp:1704
PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
Definition: pi.h:460
pi_int32
int32_t pi_int32
Definition: pi.h:193
_pi_context
Definition: pi_cuda.hpp:52
PI_QUEUE_FLAGS
constexpr pi_queue_properties PI_QUEUE_FLAGS
Definition: pi.h:730
piextCommandBufferMemBufferCopy
pi_result piextCommandBufferMemBufferCopy(pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point)
API to append a mem buffer copy command to the command-buffer.
Definition: pi_opencl.cpp:2381
_pi_device
Definition: pi_cuda.hpp:48
_pi_kernel_sub_group_info
_pi_kernel_sub_group_info
Definition: pi.h:507
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
@ PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES
Definition: pi.h:409
PI_SAMPLER_INFO_ADDRESSING_MODE
@ PI_SAMPLER_INFO_ADDRESSING_MODE
Definition: pi.h:634
PI_SAMPLER_ADDRESSING_MODE_CLAMP
@ PI_SAMPLER_ADDRESSING_MODE_CLAMP
Definition: pi.h:645