DPC++ Runtime
Runtime libraries for oneAPI DPC++
pi_esimd_emulator.cpp
Go to the documentation of this file.
1 //===---------- pi_esimd_emulator.cpp - CM Emulation Plugin ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
14 
15 #include <stdint.h>
16 
17 #include <detail/accessor_impl.hpp>
18 #include <sycl/backend_types.hpp>
19 #include <sycl/detail/common.hpp>
20 #include <sycl/detail/export.hpp>
21 #include <sycl/detail/helpers.hpp>
25 #include <sycl/ext/intel/esimd/common.hpp> // SLM_BTI
26 #include <sycl/group.hpp>
27 #include <sycl/id.hpp>
28 #include <sycl/kernel.hpp>
29 #include <sycl/nd_item.hpp>
30 #include <sycl/range.hpp>
31 
32 #include <esimdemu_support.h>
33 
34 #include <cstdarg>
35 #include <cstdio>
36 #include <cstring>
37 #include <functional>
38 #include <iostream>
39 #include <map>
40 #include <memory>
41 #include <string>
42 #include <thread>
43 #include <utility>
44 
45 #include "pi_esimd_emulator.hpp"
46 
47 #define ARG_UNUSED(x) (void)x
48 
49 namespace {
50 
51 // Helper functions for unified 'Return' type declaration - imported
52 // from pi_level_zero.cpp
53 template <typename T, typename Assign>
54 pi_result getInfoImpl(size_t ParamValueSize, void *ParamValue,
55  size_t *ParamValueSizeRet, T Value, size_t ValueSize,
56  Assign &&AssignFunc) {
57  if (ParamValue != nullptr) {
58  if (ParamValueSize < ValueSize) {
59  return PI_ERROR_INVALID_VALUE;
60  }
61  AssignFunc(ParamValue, Value, ValueSize);
62  }
63  if (ParamValueSizeRet != nullptr) {
64  *ParamValueSizeRet = ValueSize;
65  }
66  return PI_SUCCESS;
67 }
68 
69 template <typename T>
70 pi_result getInfo(size_t ParamValueSize, void *ParamValue,
71  size_t *ParamValueSizeRet, T Value) {
72  auto assignment = [](void *ParamValue, T Value, size_t ValueSize) {
73  ARG_UNUSED(ValueSize);
74  *static_cast<T *>(ParamValue) = Value;
75  };
76  return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, Value,
77  sizeof(T), assignment);
78 }
79 
80 template <typename T>
81 pi_result getInfoArray(size_t ArrayLength, size_t ParamValueSize,
82  void *ParamValue, size_t *ParamValueSizeRet, T *Value) {
83  return getInfoImpl(ParamValueSize, ParamValue, ParamValueSizeRet, Value,
84  ArrayLength * sizeof(T), memcpy);
85 }
86 
87 template <>
88 pi_result getInfo<const char *>(size_t ParamValueSize, void *ParamValue,
89  size_t *ParamValueSizeRet, const char *Value) {
90  return getInfoArray(strlen(Value) + 1, ParamValueSize, ParamValue,
91  ParamValueSizeRet, Value);
92 }
93 
94 class ReturnHelper {
95 public:
96  ReturnHelper(size_t ArgParamValueSize, void *ArgParamValue,
97  size_t *ArgParamValueSizeRet)
98  : ParamValueSize(ArgParamValueSize), ParamValue(ArgParamValue),
99  ParamValueSizeRet(ArgParamValueSizeRet) {}
100 
101  template <class T> pi_result operator()(const T &t) {
102  return getInfo(ParamValueSize, ParamValue, ParamValueSizeRet, t);
103  }
104 
105 private:
106  size_t ParamValueSize;
107  void *ParamValue;
108  size_t *ParamValueSizeRet;
109 };
110 
111 } // anonymous namespace
112 
113 // Controls PI level tracing prints.
114 static bool PrintPiTrace = false;
115 
116 static void PiTrace(std::string TraceString) {
117  if (PrintPiTrace) {
118  std::cout << TraceString << std::endl;
119  }
120 }
121 
122 // Global variables used in PI_esimd_emulator
123 // Note we only create a simple pointer variables such that C++ RT won't
124 // deallocate them automatically at the end of the main program.
125 // The heap memory allocated for this global variable reclaimed only when
126 // Sycl RT calls piTearDown().
127 static sycl::detail::ESIMDEmuPluginOpaqueData *PiESimdDeviceAccess;
128 
129 // Single-entry cache for piPlatformsGet call.
131 // TODO/FIXME : Memory leak. Handle with 'piTearDown'.
132 static std::mutex *PiPlatformCacheLock = new std::mutex;
133 
134 // Mapping between surface index and CM-managed surface
135 static std::unordered_map<unsigned int, _pi_mem *> *PiESimdSurfaceMap =
136  new std::unordered_map<unsigned int, _pi_mem *>;
137 // TODO/FIXME : Memory leak. Handle with 'piTearDown'.
138 static std::mutex *PiESimdSurfaceMapLock = new std::mutex;
139 
140 // To be compared with ESIMD_EMULATOR_PLUGIN_OPAQUE_DATA_VERSION in device
141 // interface header file
142 #define ESIMDEmuPluginDataVersion 0
143 
144 // To be compared with ESIMD_DEVICE_INTERFACE_VERSION in device
145 // interface header file
146 #define ESIMDEmuPluginInterfaceVersion 1
147 
148 // For PI_DEVICE_INFO_DRIVER_VERSION info
149 static char ESimdEmuVersionString[32];
150 
151 // Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR
152 constexpr size_t MaxMessageSize = 256;
153 thread_local pi_result ErrorMessageCode = PI_SUCCESS;
154 thread_local char ErrorMessage[MaxMessageSize];
155 
156 // Utility function for setting a message and warning
157 [[maybe_unused]] static void setErrorMessage(const char *message,
158  pi_result error_code) {
159  assert(strlen(message) <= MaxMessageSize);
160  strcpy(ErrorMessage, message);
161  ErrorMessageCode = error_code;
162 }
163 
164 // Returns plugin specific error and warning messages
166  *message = &ErrorMessage[0];
167  return ErrorMessageCode;
168 }
169 
170 using IDBuilder = sycl::detail::Builder;
171 
172 template <int NDims>
173 using KernelFunc = std::function<void(const sycl::nd_item<NDims> &)>;
174 
175 // Struct to wrap dimension info and lambda function to be invoked by
176 // CM Kernel launcher that only accepts raw function pointer for
177 // kernel execution. Function instances of 'InvokeKernel' un-wrap
178 // this struct instance and invoke lambda function ('Func')
179 template <int NDims> struct KernelInvocationContext {
181  const sycl::range<NDims> &LocalSize;
182  const sycl::range<NDims> &GlobalSize;
183  const sycl::id<NDims> &GlobalOffset;
184 };
185 
186 // A helper structure to create multi-dimensional range when
187 // dimensionality is given as a template parameter. `create` function
188 // in specializations accepts a template `Gen` function which
189 // generates range extent for a dimension given as an argument.
190 template <int NDims> struct RangeBuilder;
191 
192 template <> struct RangeBuilder<1> {
193  template <typename Gen> static sycl::range<1> create(Gen G) {
194  return sycl::range<1>{G(0)};
195  }
196 };
197 template <> struct RangeBuilder<2> {
198  template <typename Gen> static sycl::range<2> create(Gen G) {
199  return sycl::range<2>{G(0), G(1)};
200  }
201 };
202 template <> struct RangeBuilder<3> {
203  template <typename Gen> static sycl::range<3> create(Gen G) {
204  return sycl::range<3>{G(0), G(1), G(2)};
205  }
206 };
207 
208 // Function template to generate entry point of kernel execution as
209 // raw function pointer. CM kernel launcher executes one instance of
210 // this function per 'NDims'
211 template <int NDims> void InvokeKernel(KernelInvocationContext<NDims> *ctx) {
212 
213  sycl::range<NDims> GroupSize{
214  sycl::detail::InitializedVal<NDims, sycl::range>::template get<0>()};
215 
216  for (int i = 0; i < NDims; ++i) {
217  GroupSize[i] = ctx->GlobalSize[i] / ctx->LocalSize[i];
218  }
219 
220  const sycl::id<NDims> LocalID = RangeBuilder<NDims>::create(
221  [](int i) { return cm_support::get_thread_idx(i); });
222 
223  const sycl::id<NDims> GroupID = RangeBuilder<NDims>::create(
224  [](int i) { return cm_support::get_group_idx(i); });
225 
226  const sycl::group<NDims> Group = IDBuilder::createGroup<NDims>(
227  ctx->GlobalSize, ctx->LocalSize, GroupSize, GroupID);
228 
229  const sycl::id<NDims> GlobalID =
230  GroupID * ctx->LocalSize + LocalID + ctx->GlobalOffset;
231 
232  const sycl::item<NDims, /*Offset=*/true> GlobalItem =
233  IDBuilder::createItem<NDims, true>(ctx->GlobalSize, GlobalID,
234  ctx->GlobalOffset);
235 
236  const sycl::item<NDims, /*Offset=*/false> LocalItem =
237  IDBuilder::createItem<NDims, false>(ctx->LocalSize, LocalID);
238 
239  const sycl::nd_item<NDims> NDItem =
240  IDBuilder::createNDItem<NDims>(GlobalItem, LocalItem, Group);
241 
242  ctx->Func(NDItem);
243 }
244 
245 // Interface for lauching kernels using libcm from CM EMU project.
246 template <int DIMS> class libCMBatch {
247 private:
248  const KernelFunc<DIMS> &MKernel;
249  std::vector<uint32_t> GroupDim, SpaceDim;
250 
251 public:
253  : MKernel(Kernel), GroupDim{1, 1, 1}, SpaceDim{1, 1, 1} {}
254 
255  void runIterationSpace(const sycl::range<DIMS> &LocalSize,
256  const sycl::range<DIMS> &GlobalSize,
257  const sycl::id<DIMS> &GlobalOffset) {
258 
259  for (int I = 0; I < DIMS; I++) {
260  SpaceDim[I] = (uint32_t)LocalSize[I];
261  GroupDim[I] = (uint32_t)(GlobalSize[I] / LocalSize[I]);
262  }
263 
264  const auto InvokeKernelArg = KernelInvocationContext<DIMS>{
265  MKernel, LocalSize, GlobalSize, GlobalOffset};
266 
267  EsimdemuKernel{reinterpret_cast<fptrVoid>(InvokeKernel<DIMS>),
268  GroupDim.data(), SpaceDim.data()}
269  .launchMT(sizeof(InvokeKernelArg), &InvokeKernelArg);
270  }
271 };
272 
273 unsigned int sycl_get_cm_surface_index(void *PtrInput) {
274  _pi_mem *Surface = static_cast<_pi_mem *>(PtrInput);
275 
276  return Surface->SurfaceIndex;
277 }
278 
279 // Function to provide image info for kernel compilation using surface
280 // index without dependency on '_pi_image' definition
281 void sycl_get_cm_buffer_params(unsigned int IndexInput, char **BaseAddr,
282  uint32_t *Width, std::mutex **BufMtxLock) {
283  std::lock_guard<std::mutex> Lock{*PiESimdSurfaceMapLock};
284  auto MemIter = PiESimdSurfaceMap->find(IndexInput);
285 
286  assert(MemIter != PiESimdSurfaceMap->end() && "Invalid Surface Index");
287 
288  _pi_buffer *Buf = static_cast<_pi_buffer *>(MemIter->second);
289 
290  *BaseAddr = Buf->MapHostPtr;
291  *Width = static_cast<uint32_t>(Buf->Size);
292 
293  *BufMtxLock = &(Buf->SurfaceLock);
294 }
295 
296 // Function to provide image info for kernel compilation using surface
297 // index without dependency on '_pi_image' definition
298 void sycl_get_cm_image_params(unsigned int IndexInput, char **BaseAddr,
299  uint32_t *Width, uint32_t *Height, uint32_t *Bpp,
300  std::mutex **ImgMtxLock) {
301  std::lock_guard<std::mutex> Lock{*PiESimdSurfaceMapLock};
302  auto MemIter = PiESimdSurfaceMap->find(IndexInput);
303  assert(MemIter != PiESimdSurfaceMap->end() && "Invalid Surface Index");
304 
305  _pi_image *Img = static_cast<_pi_image *>(MemIter->second);
306 
307  *BaseAddr = Img->MapHostPtr;
308 
309  *Bpp = static_cast<uint32_t>(Img->BytesPerPixel);
310  *Width = static_cast<uint32_t>(Img->Width) * (*Bpp);
311  *Height = static_cast<uint32_t>(Img->Height);
312 
313  *ImgMtxLock = &(Img->SurfaceLock);
314 }
315 
318 sycl::detail::ESIMDDeviceInterface::ESIMDDeviceInterface() {
320  reserved = nullptr;
321 
322  /* From 'esimd_emulator_functions_v1.h' : Start */
323  cm_barrier_ptr = cm_support::barrier;
324  cm_sbarrier_ptr = cm_support::split_barrier;
325  cm_fence_ptr = cm_support::fence;
326 
327  sycl_get_surface_base_addr_ptr = cm_support::get_surface_base_addr;
328  __cm_emu_get_slm_ptr = cm_support::get_slm_base;
329  cm_slm_init_ptr = cm_support::init_slm;
330 
331  sycl_get_cm_surface_index_ptr = sycl_get_cm_surface_index;
332  sycl_get_cm_buffer_params_ptr = sycl_get_cm_buffer_params;
333  sycl_get_cm_image_params_ptr = sycl_get_cm_image_params;
334 
335  /* From 'esimd_emulator_functions_v1.h' : End */
336 }
337 
340 
341 static bool isNull(int NDims, const size_t *R) {
342  return ((0 == R[0]) && (NDims < 2 || 0 == R[1]) && (NDims < 3 || 0 == R[2]));
343 }
344 
345 // NDims is the number of dimensions in the ND-range. Kernels are
346 // normalized in the handler so that all kernels take an sycl::nd_item
347 // as argument (see StoreLambda in sycl/handler.hpp). For kernels
348 // whose workgroup size (LocalWorkSize) is unspecified, InvokeImpl
349 // sets LocalWorkSize to {1, 1, 1}, i.e. each workgroup contains just
350 // one work item. CM emulator will run several workgroups in parallel
351 // depending on environment settings.
352 
353 template <int NDims> struct InvokeImpl {
354 
355  static sycl::range<NDims> get_range(const size_t *Array) {
356  if constexpr (NDims == 1)
357  return sycl::range<NDims>{Array[0]};
358  else if constexpr (NDims == 2)
359  return sycl::range<NDims>{Array[0], Array[1]};
360  else if constexpr (NDims == 3)
361  return sycl::range<NDims>{Array[0], Array[1], Array[2]};
362  }
363 
364  static void invoke(pi_kernel Kernel, const size_t *GlobalWorkOffset,
365  const size_t *GlobalWorkSize,
366  const size_t *LocalWorkSize) {
367  libCMBatch<NDims>{*reinterpret_cast<KernelFunc<NDims> *>(Kernel)}
368  .runIterationSpace(get_range(LocalWorkSize), get_range(GlobalWorkSize),
369  sycl::id<NDims>{get_range(GlobalWorkOffset)});
370  }
371 };
372 
373 extern "C" {
374 
375 #define DIE_NO_IMPLEMENTATION \
376  if (PrintPiTrace) { \
377  std::cerr << "Not Implemented : " << __FUNCTION__ \
378  << " - File : " << __FILE__; \
379  std::cerr << " / Line : " << __LINE__ << std::endl; \
380  } \
381  return PI_ERROR_INVALID_OPERATION;
382 
383 #define CONTINUE_NO_IMPLEMENTATION \
384  if (PrintPiTrace) { \
385  std::cerr << "Warning : Not Implemented : " << __FUNCTION__ \
386  << " - File : " << __FILE__; \
387  std::cerr << " / Line : " << __LINE__ << std::endl; \
388  } \
389  return PI_SUCCESS;
390 
391 #define CASE_PI_UNSUPPORTED(not_supported) \
392  case not_supported: \
393  if (PrintPiTrace) { \
394  std::cerr << std::endl \
395  << "Unsupported PI case : " << #not_supported << " in " \
396  << __FUNCTION__ << ":" << __LINE__ << "(" << __FILE__ << ")" \
397  << std::endl; \
398  } \
399  return PI_ERROR_INVALID_OPERATION;
400 
402  pi_uint32 *NumPlatforms) {
403  static bool PiPlatformCachePopulated = false;
404  static const char *PiTraceEnv = std::getenv("SYCL_PI_TRACE");
405  static const int PiTraceValue = PiTraceEnv ? std::stoi(PiTraceEnv) : 0;
406 
407  if (PiTraceValue == -1) { // Means print all PI traces
408  PrintPiTrace = true;
409  }
410 
411  if (NumPlatforms) {
412  *NumPlatforms = 1;
413  }
414 
415  if (NumEntries == 0) {
417  if (Platforms != nullptr) {
418  PiTrace("Invalid Arguments for piPlatformsGet of "
419  "esimd_emulator (Platforms!=nullptr) "
420  "while querying number of platforms");
421  return PI_ERROR_INVALID_VALUE;
422  }
423  return PI_SUCCESS;
424  }
425 
426  if (Platforms == nullptr && NumPlatforms == nullptr) {
427  return PI_ERROR_INVALID_VALUE;
428  }
429 
430  std::lock_guard<std::mutex> Lock{*PiPlatformCacheLock};
433  PiPlatformCache->CmEmuVersion = std::string("0.0.1");
435  }
436 
437  if (Platforms && NumEntries > 0) {
438  *Platforms = PiPlatformCache;
439  }
440 
441  return PI_SUCCESS;
442 }
443 
445  size_t ParamValueSize, void *ParamValue,
446  size_t *ParamValueSizeRet) {
447  if (Platform == nullptr) {
448  return PI_ERROR_INVALID_PLATFORM;
449  }
450  ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
451 
452  switch (ParamName) {
454  return ReturnValue("Intel(R) ESIMD_EMULATOR/GPU");
455 
457  return ReturnValue("Intel(R) Corporation");
458 
460  return ReturnValue(Platform->CmEmuVersion.c_str());
461 
463  return ReturnValue("FULL_PROFILE");
464 
466  return ReturnValue("");
467 
468  default:
469  // TODO: implement other parameters
470  die("Unsupported ParamName in piPlatformGetInfo");
471  }
472 
473  return PI_SUCCESS;
474 }
475 
478 }
479 
482 }
483 
485  pi_uint32 NumEntries, pi_device *Devices,
486  pi_uint32 *NumDevices) {
487  if (Platform == nullptr) {
488  return PI_ERROR_INVALID_PLATFORM;
489  }
490 
491  pi_result Res = Platform->populateDeviceCacheIfNeeded();
492  if (Res != PI_SUCCESS) {
493  return Res;
494  }
495 
496  // CM has single-root-GPU-device without sub-device support.
497  pi_uint32 DeviceCount = (DeviceType & PI_DEVICE_TYPE_GPU) ? 1 : 0;
498 
499  if (NumDevices) {
500  *NumDevices = DeviceCount;
501  }
502 
503  if (NumEntries == 0) {
505  if (Devices != nullptr) {
506  PiTrace("Invalid Arguments for piDevicesGet of esimd_emultor "
507  "(Devices!=nullptr) while querying number of platforms");
508  return PI_ERROR_INVALID_VALUE;
509  }
510  return PI_SUCCESS;
511  }
512 
513  if (DeviceCount == 0) {
515  return PI_SUCCESS;
516  }
517 
518  if (Devices) {
519  *Devices = Platform->PiDeviceCache.get();
520  }
521  return PI_SUCCESS;
522 }
523 
524 // Check the device cache and load it if necessary.
526  std::lock_guard<std::mutex> Lock(PiDeviceCacheMutex);
527 
528  if (DeviceCachePopulated) {
529  return PI_SUCCESS;
530  }
531  cm_support::CmDevice *CmDevice = nullptr;
532  // TODO FIXME Implement proper version checking and reporting:
533  // - version passed to cm_support::CreateCmDevice
534  // - CmEmuVersion
535  // - PluginVersion
536  // - ESIMDEmuPluginOpaqueData::version
537  //
538  // PI_DEVICE_INFO_DRIVER_VERSION could report the ESIMDDeviceInterface
539  // version, PI_PLATFORM_INFO_VERSION - the underlying libCM library version.
540  unsigned int Version = 0;
541 
542  int Result = cm_support::CreateCmDevice(CmDevice, Version);
543 
544  if (Result != cm_support::CM_SUCCESS) {
545  return PI_ERROR_INVALID_DEVICE;
546  }
547 
548  // CM Device version info consists of two decimal numbers - major
549  // and minor. Minor is single-digit. Version info is encoded into a
550  // unsigned integer value = 100 * major + minor. Second from right
551  // digit in decimal must be zero as it is used as 'dot'
552  // REF - $CM_EMU/common/cm_version_defs.h - 'CURRENT_CM_VERSION'
553  // e.g. CM version 7.3 => Device version = 703
554 
555  if (((Version / 10) % 10) != 0) {
556  PiTrace("Invalid Arguments for piPlatformsGet of "
557  "esimd_emulator (Platforms!=nullptr) "
558  "while querying number of platforms");
559  return PI_ERROR_INVALID_DEVICE;
560  }
561 
562  std::ostringstream StrFormat;
563  StrFormat << (int)(Version / 100) << "." << (int)(Version % 10);
564 
565  std::unique_ptr<_pi_device> Device(
566  new _pi_device(this, CmDevice, StrFormat.str()));
567  PiDeviceCache = std::move(Device);
568  DeviceCachePopulated = true;
569  return PI_SUCCESS;
570 }
571 
573  if (Device == nullptr) {
574  return PI_ERROR_INVALID_DEVICE;
575  }
576 
577  // CM supports only single device, which is root-device. 'Retain' is
578  // No-op.
579  return PI_SUCCESS;
580 }
581 
583  if (Device == nullptr) {
584  return PI_ERROR_INVALID_DEVICE;
585  }
586 
587  // CM supports only single device, which is root-device. 'Release'
588  // is No-op.
589  return PI_SUCCESS;
590 }
591 
593  size_t ParamValueSize, void *ParamValue,
594  size_t *ParamValueSizeRet) {
595  ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
596 
597  switch (ParamName) {
598  case PI_DEVICE_INFO_TYPE:
599  return ReturnValue(PI_DEVICE_TYPE_GPU);
601  return ReturnValue(pi_device{0});
603  return ReturnValue(Device->Platform);
604  case PI_DEVICE_INFO_NAME:
605  return ReturnValue("ESIMD_EMULATOR");
607  return ReturnValue(pi_bool{true});
615  return ReturnValue(ESimdEmuVersionString);
617  return ReturnValue("Intel(R) Corporation");
619  return ReturnValue(size_t{8192});
621  return ReturnValue(size_t{8192});
623  return ReturnValue(pi_bool{1});
625  // TODO : Populate return string accordingly - e.g. cl_khr_fp16,
626  // cl_khr_fp64, cl_khr_int64_base_atomics,
627  // cl_khr_int64_extended_atomics
628  return ReturnValue("cl_khr_fp64");
630  return ReturnValue(Device->VersionStr.c_str());
631  case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: // emulator doesn't support partition
632  return ReturnValue(pi_bool{true});
634  return ReturnValue(pi_bool{false});
636  return ReturnValue(pi_bool{false});
638  return ReturnValue(pi_uint32{256});
640  return ReturnValue(pi_uint32{0});
642  return ReturnValue(pi_device_partition_property{0});
644  // '0x8086' : 'Intel HD graphics vendor ID'
645  return ReturnValue(pi_uint32{0x8086});
647  // Default SLM_MAX_SIZE from CM_EMU
648  return ReturnValue(pi_uint32{65536});
650  return ReturnValue(size_t{256});
652  // Imported from level_zero
653  return ReturnValue(pi_uint32{8});
657  // Default minimum values required by the SYCL specification.
658  return ReturnValue(size_t{2048});
660  return ReturnValue(pi_uint32{3});
662  return ReturnValue(pi_device_partition_property{0});
664  return ReturnValue("");
666  return ReturnValue(pi_queue_properties{PI_QUEUE_FLAG_ON_DEVICE});
668  struct {
669  size_t Arr[3];
670  } MaxGroupSize = {{256, 256, 1}};
671  return ReturnValue(MaxGroupSize);
672  }
687  return ReturnValue(pi_uint32{1});
688 
689  // Imported from level_zero
695  pi_uint64 Supported = 0;
696  // TODO[1.0]: how to query for USM support now?
697  if (true) {
698  // TODO: Use ze_memory_access_capabilities_t
699  Supported = PI_USM_ACCESS | PI_USM_ATOMIC_ACCESS |
701  }
702  return ReturnValue(Supported);
703  }
705  return ReturnValue(
706  pi_uint32{sizeof(void *) * std::numeric_limits<unsigned char>::digits});
708  return ReturnValue(pi_uint32{1000});
710  return ReturnValue(pi_bool{true});
712  return ReturnValue(pi_bool{true});
716  return ReturnValue(pi_uint32{0});
719  return ReturnValue(size_t{0x80000000});
722  return ReturnValue(size_t{0});
725  return ReturnValue(pi_uint32{16});
728  return ReturnValue(size_t{32});
733  uint64_t FPValue = PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT |
736  PI_FP_FMA;
737  return ReturnValue(pi_uint64{FPValue});
738  }
740  return ReturnValue(PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
742  // TODO : CHECK
743  return ReturnValue(pi_uint32{64});
745  // TODO : CHECK
746  return ReturnValue(pi_uint64{0});
748  // TODO : CHECK
749  return ReturnValue(pi_uint64{0});
751  // TODO : CHECK
752  return ReturnValue(pi_uint64{0});
754  // TODO : CHECK
755  return ReturnValue(pi_uint32{64});
757  // TODO : CHECK
758  return ReturnValue(PI_DEVICE_LOCAL_MEM_TYPE_LOCAL);
760  return ReturnValue(pi_bool{false});
762  // TODO : CHECK
763  return ReturnValue(size_t{0});
765  // TODO : CHECK
766  return ReturnValue("");
768  // TODO : CHECK
769  return ReturnValue(size_t{1024});
771  return ReturnValue(pi_bool{false});
773  return ReturnValue(pi_device_affinity_domain{0});
775  // TODO : CHECK
776  return ReturnValue(pi_uint64{0});
778  // TODO : CHECK
779  return ReturnValue(
782  return ReturnValue("FULL_PROFILE");
784  // TODO : CHECK
785  return ReturnValue(pi_uint32{0});
787  return ReturnValue(size_t{1});
789  return ReturnValue(pi_int32{1});
790 
794 
795  // Intel-specific extensions
811 
812  default:
814  }
815  return PI_SUCCESS;
816 }
817 
819  pi_uint32, pi_device *, pi_uint32 *) {
821 }
822 
825 }
826 
828  pi_device *) {
830 }
831 
833  pi_uint32 NumDevices, const pi_device *Devices,
834  void (*PFnNotify)(const char *ErrInfo,
835  const void *PrivateInfo, size_t CB,
836  void *UserData),
837  void *UserData, pi_context *RetContext) {
838  ARG_UNUSED(Properties);
839  ARG_UNUSED(PFnNotify);
840  ARG_UNUSED(UserData);
841 
842  if (NumDevices != 1) {
843  return PI_ERROR_INVALID_VALUE;
844  }
845  if (Devices == nullptr) {
846  return PI_ERROR_INVALID_DEVICE;
847  }
848  if (RetContext == nullptr) {
849  return PI_ERROR_INVALID_VALUE;
850  }
851 
852  try {
854  *RetContext = new _pi_context(Devices[0]);
855  } catch (const std::bad_alloc &) {
856  return PI_ERROR_OUT_OF_HOST_MEMORY;
857  } catch (...) {
858  return PI_ERROR_UNKNOWN;
859  }
860  return PI_SUCCESS;
861 }
862 
864  size_t *) {
866 }
867 
869  pi_context_extended_deleter, void *) {
871 }
872 
875 }
876 
878  const pi_device *, bool,
879  pi_context *) {
881 }
882 
884  if (Context == nullptr) {
885  return PI_ERROR_INVALID_CONTEXT;
886  }
887 
888  ++(Context->RefCount);
889 
890  return PI_SUCCESS;
891 }
892 
894  if (Context == nullptr || (Context->RefCount <= 0)) {
895  return PI_ERROR_INVALID_CONTEXT;
896  }
897 
898  if (--(Context->RefCount) == 0) {
901  std::lock_guard<std::mutex> Lock(Context->Addr2CmBufferSVMLock);
902  for (auto &Entry : Context->Addr2CmBufferSVM) {
903  Context->Device->CmDevicePtr->DestroyBufferSVM(Entry.second);
904  }
905  delete Context;
906  }
907 
908  return PI_SUCCESS;
909 }
910 
913  if (HostPtr == nullptr) {
914  PiTrace("HostPtr argument is required for "
915  "PI_MEM_FLAGS_HOST_PTR_USE/COPY");
916  return false;
917  }
918  // COPY and USE are mutually exclusive
921  PiTrace("PI_MEM_FLAGS_HOST_PTR_USE and _COPY cannot be used together");
922  return false;
923  }
924  }
925  return true;
926 }
927 
929  pi_queue_properties *Properties, pi_queue *Queue) {
930  assert(Properties);
931  // Expect flags mask to be passed first.
932  assert(Properties[0] == PI_QUEUE_FLAGS);
933  if (Properties[0] != PI_QUEUE_FLAGS)
934  return PI_ERROR_INVALID_VALUE;
935  pi_queue_properties Flags = Properties[1];
936  // Extra data isn't supported yet.
937  assert(Properties[2] == 0);
938  if (Properties[2] != 0)
939  return PI_ERROR_INVALID_VALUE;
940  return piQueueCreate(Context, Device, Flags, Queue);
941 }
943  pi_queue_properties Properties, pi_queue *Queue) {
945 
947  // TODO : Support Out-of-order Queue
948  *Queue = nullptr;
949  return PI_ERROR_INVALID_QUEUE_PROPERTIES;
950  }
951 
952  cm_support::CmQueue *CmQueue = nullptr;
953 
954  int Result = Context->Device->CmDevicePtr->CreateQueue(CmQueue);
955  if (Result != cm_support::CM_SUCCESS) {
956  return PI_ERROR_INVALID_CONTEXT;
957  }
958 
959  try {
960  *Queue = new _pi_queue(Context, CmQueue);
961  } catch (const std::bad_alloc &) {
962  return PI_ERROR_OUT_OF_HOST_MEMORY;
963  } catch (...) {
964  return PI_ERROR_UNKNOWN;
965  }
966 
967  return PI_SUCCESS;
968 }
969 
970 pi_result piQueueGetInfo(pi_queue, pi_queue_info, size_t, void *, size_t *) {
972 }
973 
975  if (Queue == nullptr) {
976  return PI_ERROR_INVALID_QUEUE;
977  }
978  ++(Queue->RefCount);
979  return PI_SUCCESS;
980 }
981 
983  if ((Queue == nullptr) || (Queue->CmQueuePtr == nullptr)) {
984  return PI_ERROR_INVALID_QUEUE;
985  }
986 
987  if (--(Queue->RefCount) == 0) {
988  // CM's 'DestoryQueue' is no-op
989  // Queue->Context->Device->CmDevicePTr->DestroyQueue(Queue->CmQueuePtr);
990  delete Queue;
991  }
992 
993  return PI_SUCCESS;
994 }
995 
997  // No-op as enqueued commands with ESIMD_EMULATOR plugin are blocking
998  // ones that do not return until their completion - kernel execution
999  // and memory read.
1001 }
1002 
1004  // No-op as enqueued commands with ESIMD_EMULATOR plugin are blocking
1005  // ones that do not return until their completion - kernel execution
1006  // and memory read.
1008 }
1009 
1012 }
1013 
1015  pi_device, bool, pi_queue *) {
1017 }
1018 
1020  void *HostPtr, pi_mem *RetMem,
1021  const pi_mem_properties *properties) {
1022  ARG_UNUSED(properties);
1023 
1024  if ((Flags & PI_MEM_FLAGS_ACCESS_RW) == 0) {
1025  PiTrace("Invalid memory attribute for piMemBufferCreate");
1026  return PI_ERROR_INVALID_OPERATION;
1027  }
1028 
1029  if (Context == nullptr) {
1030  return PI_ERROR_INVALID_CONTEXT;
1031  }
1032  if (RetMem == nullptr) {
1033  return PI_ERROR_INVALID_VALUE;
1034  }
1035 
1036  // Flag & HostPtr argument sanity check
1037  if (!Context->checkSurfaceArgument(Flags, HostPtr)) {
1038  return PI_ERROR_INVALID_OPERATION;
1039  }
1040 
1041  char *MapBasePtr = nullptr;
1042  cm_surface_ptr_t CmBuf;
1043  cm_support::SurfaceIndex *CmIndex = nullptr;
1044  int Status = cm_support::CM_FAILURE;
1045 
1046  if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) {
1048  Status = Context->Device->CmDevicePtr->CreateBufferUP(
1049  static_cast<unsigned int>(Size), HostPtr, CmBuf.UPBufPtr);
1050  CmBuf.UPBufPtr->GetIndex(CmIndex);
1051  } else {
1053  Status = Context->Device->CmDevicePtr->CreateBuffer(
1054  static_cast<unsigned int>(Size), CmBuf.RegularBufPtr);
1055  CmBuf.RegularBufPtr->GetIndex(CmIndex);
1056 
1057  if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) {
1058  CmBuf.RegularBufPtr->WriteSurface(
1059  reinterpret_cast<const unsigned char *>(HostPtr), nullptr,
1060  static_cast<unsigned int>(Size));
1061  }
1062  }
1063 
1064  if (Status != cm_support::CM_SUCCESS) {
1065  return PI_ERROR_INVALID_OPERATION;
1066  }
1067 
1068  MapBasePtr =
1069  pi_cast<char *>(cm_support::get_surface_base_addr(CmIndex->get_data()));
1070 
1071  try {
1072  *RetMem =
1073  new _pi_buffer(Context, MapBasePtr, CmBuf, CmIndex->get_data(), Size);
1074  } catch (const std::bad_alloc &) {
1075  return PI_ERROR_OUT_OF_HOST_MEMORY;
1076  } catch (...) {
1077  return PI_ERROR_UNKNOWN;
1078  }
1079 
1080  std::lock_guard<std::mutex> Lock{*PiESimdSurfaceMapLock};
1081  if (PiESimdSurfaceMap->find((*RetMem)->SurfaceIndex) !=
1082  PiESimdSurfaceMap->end()) {
1083  PiTrace("Failure from CM-managed buffer creation");
1084  return PI_ERROR_INVALID_MEM_OBJECT;
1085  }
1086 
1087  (*PiESimdSurfaceMap)[(*RetMem)->SurfaceIndex] = *RetMem;
1088 
1089  return PI_SUCCESS;
1090 }
1091 
1092 pi_result piMemGetInfo(pi_mem, pi_mem_info, size_t, void *, size_t *) {
1094 }
1095 
1097  if (Mem == nullptr) {
1098  return PI_ERROR_INVALID_MEM_OBJECT;
1099  }
1100  ++(Mem->RefCount);
1101  return PI_SUCCESS;
1102 }
1103 
1105  if ((Mem == nullptr) || (Mem->RefCount == 0)) {
1106  return PI_ERROR_INVALID_MEM_OBJECT;
1107  }
1108 
1109  if (--(Mem->RefCount) == 0) {
1110  // Removing Surface-map entry
1111  std::lock_guard<std::mutex> Lock{*PiESimdSurfaceMapLock};
1112  auto MapEntryIt = PiESimdSurfaceMap->find(Mem->SurfaceIndex);
1113  if (MapEntryIt == PiESimdSurfaceMap->end()) {
1114  PiTrace("Failure from Buffer/Image deletion");
1115  return PI_ERROR_INVALID_MEM_OBJECT;
1116  }
1117  PiESimdSurfaceMap->erase(MapEntryIt);
1118  delete Mem;
1119  }
1120  return PI_SUCCESS;
1121 }
1122 
1124  int Status = cm_support::CM_FAILURE;
1125 
1126  cm_support::CmDevice *CmDevice = Context->Device->CmDevicePtr;
1127 
1129  Status = CmDevice->DestroyBufferUP(SurfacePtr.UPBufPtr);
1131  Status = CmDevice->DestroySurface(SurfacePtr.RegularBufPtr);
1133  Status = CmDevice->DestroySurface2DUP(SurfacePtr.UPImgPtr);
1135  Status = CmDevice->DestroySurface(SurfacePtr.RegularImgPtr);
1136  }
1137 
1138  sycl::detail::pi::assertion(Status == cm_support::CM_SUCCESS &&
1139  "Surface Deletion Failure from CM_EMU");
1140 
1141  for (auto mapit = Mappings.begin(); mapit != Mappings.end();) {
1142  mapit = Mappings.erase(mapit);
1143  }
1144 }
1145 
1146 cm_support::CM_SURFACE_FORMAT
1148  using ULongPair = std::pair<unsigned long, unsigned long>;
1149  using FmtMap = std::map<ULongPair, cm_support::CM_SURFACE_FORMAT>;
1150  static const FmtMap pi2cm = {
1152  cm_support::CM_SURFACE_FORMAT_A8R8G8B8},
1153 
1155  cm_support::CM_SURFACE_FORMAT_A8R8G8B8},
1156 
1158  cm_support::CM_SURFACE_FORMAT_A8R8G8B8},
1159 
1161  cm_support::CM_SURFACE_FORMAT_R32G32B32A32F},
1162  };
1163  auto Result = pi2cm.find(
1164  {PiFormat->image_channel_data_type, PiFormat->image_channel_order});
1165  if (Result != pi2cm.end()) {
1166  return Result->second;
1167  }
1168  return cm_support::CM_SURFACE_FORMAT_UNKNOWN;
1169 }
1170 
1172  const pi_image_format *ImageFormat,
1173  const pi_image_desc *ImageDesc, void *HostPtr,
1174  pi_mem *RetImage) {
1175  if ((Flags & PI_MEM_FLAGS_ACCESS_RW) == 0) {
1176  PiTrace("Invalid memory attribute for piMemImageCreate");
1177  return PI_ERROR_INVALID_OPERATION;
1178  }
1179 
1180  if (ImageFormat == nullptr || ImageDesc == nullptr)
1181  return PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR;
1182 
1183  switch (ImageDesc->image_type) {
1184  case PI_MEM_TYPE_IMAGE2D:
1185  break;
1186 
1192 
1193  default:
1194  return PI_ERROR_INVALID_MEM_OBJECT;
1195  }
1196 
1197  auto BytesPerPixel = 4;
1198  switch (ImageFormat->image_channel_data_type) {
1200  BytesPerPixel = 16;
1201  break;
1204  BytesPerPixel = 4;
1205  break;
1218  default:
1219  return PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
1220  }
1221 
1222  // Flag & HostPtr argument sanity check
1223  if (!Context->checkSurfaceArgument(Flags, HostPtr)) {
1224  return PI_ERROR_INVALID_OPERATION;
1225  }
1226 
1227  cm_support::CM_SURFACE_FORMAT CmSurfFormat =
1228  ConvertPiImageFormatToCmFormat(ImageFormat);
1229  if (CmSurfFormat == cm_support::CM_SURFACE_FORMAT_UNKNOWN) {
1230  return PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
1231  }
1232 
1233  char *MapBasePtr = nullptr;
1234  cm_surface_ptr_t CmImg;
1235  cm_support::SurfaceIndex *CmIndex = nullptr;
1236  int Status = cm_support::CM_SUCCESS;
1237 
1238  if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) {
1240  Status = Context->Device->CmDevicePtr->CreateSurface2DUP(
1241  static_cast<unsigned int>(ImageDesc->image_width),
1242  static_cast<unsigned int>(ImageDesc->image_height), CmSurfFormat,
1243  HostPtr, CmImg.UPImgPtr);
1244  CmImg.UPImgPtr->GetIndex(CmIndex);
1245  } else {
1247  Status = Context->Device->CmDevicePtr->CreateSurface2D(
1248  static_cast<unsigned int>(ImageDesc->image_width),
1249  static_cast<unsigned int>(ImageDesc->image_height), CmSurfFormat,
1250  CmImg.RegularImgPtr);
1251  CmImg.RegularImgPtr->GetIndex(CmIndex);
1252 
1253  if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) {
1254  CmImg.RegularImgPtr->WriteSurface(
1255  reinterpret_cast<const unsigned char *>(HostPtr), nullptr,
1256  static_cast<unsigned int>(ImageDesc->image_width *
1257  ImageDesc->image_height * BytesPerPixel));
1258  }
1259  }
1260 
1261  if (Status != cm_support::CM_SUCCESS) {
1262  return PI_ERROR_INVALID_OPERATION;
1263  }
1264 
1265  MapBasePtr =
1266  pi_cast<char *>(cm_support::get_surface_base_addr(CmIndex->get_data()));
1267 
1268  try {
1269  *RetImage = new _pi_image(Context, MapBasePtr, CmImg, CmIndex->get_data(),
1270  ImageDesc->image_width, ImageDesc->image_height,
1271  BytesPerPixel);
1272  } catch (const std::bad_alloc &) {
1273  return PI_ERROR_OUT_OF_HOST_MEMORY;
1274  } catch (...) {
1275  return PI_ERROR_UNKNOWN;
1276  }
1277 
1278  std::lock_guard<std::mutex> Lock{*PiESimdSurfaceMapLock};
1279  if (PiESimdSurfaceMap->find((*RetImage)->SurfaceIndex) !=
1280  PiESimdSurfaceMap->end()) {
1281  PiTrace("Failure from CM-managed image creation");
1282  return PI_ERROR_INVALID_VALUE;
1283  }
1284 
1285  (*PiESimdSurfaceMap)[(*RetImage)->SurfaceIndex] = *RetImage;
1286 
1287  return PI_SUCCESS;
1288 }
1289 
1292 }
1293 
1295  pi_mem *) {
1297 }
1298 
1299 pi_result piProgramCreate(pi_context, const void *, size_t, pi_program *) {
1301 }
1302 
1304  const size_t *, const unsigned char **,
1305  size_t, const pi_device_binary_property *,
1306  pi_int32 *, pi_program *) {
1308 }
1309 
1311  const size_t *, const unsigned char **,
1312  pi_int32 *, pi_program *) {
1314 }
1315 
1317  const size_t *, pi_program *) {
1319 }
1320 
1322  size_t *) {
1324 }
1325 
1327  pi_uint32, const pi_program *,
1328  void (*)(pi_program, void *), void *, pi_program *) {
1330 }
1331 
1333  const char *, pi_uint32, const pi_program *,
1334  const char **, void (*)(pi_program, void *),
1335  void *) {
1337 }
1338 
1340  void (*)(pi_program, void *), void *) {
1342 }
1343 
1345  size_t, void *, size_t *) {
1347 }
1348 
1350 
1352 
1355 }
1356 
1358  pi_program *) {
1360 }
1361 
1364 }
1365 
1366 pi_result piKernelSetArg(pi_kernel, pi_uint32, size_t, const void *) {
1368 }
1369 
1372 }
1373 
1374 // Special version of piKernelSetArg to accept pi_sampler.
1377 }
1378 
1379 pi_result piKernelGetInfo(pi_kernel, pi_kernel_info, size_t, void *, size_t *) {
1381 }
1382 
1384  size_t, void *, size_t *) {
1386 }
1387 
1389  pi_kernel_sub_group_info, size_t,
1390  const void *, size_t, void *, size_t *) {
1392 }
1393 
1395 
1397 
1399 
1401  size_t ParamValueSize, void *ParamValue,
1402  size_t *ParamValueSizeRet) {
1403  if (ParamName != PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) {
1405  }
1406 
1407  auto CheckAndFillStatus = [&](const cm_support::CM_STATUS &State) {
1408  pi_int32 Result = PI_EVENT_RUNNING;
1409  if (State == cm_support::CM_STATUS_FINISHED)
1410  Result = PI_EVENT_COMPLETE;
1411  if (ParamValue) {
1412  if (ParamValueSize < sizeof(Result))
1413  return PI_ERROR_INVALID_VALUE;
1414  *static_cast<pi_int32 *>(ParamValue) = Result;
1415  }
1416  if (ParamValueSizeRet) {
1417  *ParamValueSizeRet = sizeof(Result);
1418  }
1419  return PI_SUCCESS;
1420  };
1421  // Dummy event is already completed ones done by CM.
1422  if (Event->IsDummyEvent)
1423  return CheckAndFillStatus(cm_support::CM_STATUS_FINISHED);
1424 
1425  if (Event->CmEventPtr == nullptr)
1426  return PI_ERROR_INVALID_EVENT;
1427 
1428  cm_support::CM_STATUS Status;
1429  int32_t Result = Event->CmEventPtr->GetStatus(Status);
1430  if (Result != cm_support::CM_SUCCESS)
1431  return PI_ERROR_COMMAND_EXECUTION_FAILURE;
1432 
1433  return CheckAndFillStatus(Status);
1434 }
1435 
1437  size_t ParamValueSize, void *ParamValue,
1438  size_t *ParamValueSizeRet) {
1439  ARG_UNUSED(Event);
1440  ARG_UNUSED(ParamName);
1441  ARG_UNUSED(ParamValueSize);
1442  ARG_UNUSED(ParamValue);
1443  ARG_UNUSED(ParamValueSizeRet);
1444 
1445  PiTrace("Warning : Profiling Not supported under PI_ESIMD_EMULATOR");
1446  return PI_SUCCESS;
1447 }
1448 
1449 pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) {
1450  for (int i = 0; i < (int)NumEvents; i++) {
1451  if (EventList[i]->IsDummyEvent) {
1452  // Dummy event is already completed ones done by CM. Skip
1453  // waiting.
1454  continue;
1455  }
1456  if (EventList[i]->CmEventPtr == nullptr) {
1457  return PI_ERROR_INVALID_EVENT;
1458  }
1459  int Result = EventList[i]->CmEventPtr->WaitForTaskFinished();
1460  if (Result != cm_support::CM_SUCCESS) {
1461  return PI_ERROR_OUT_OF_RESOURCES;
1462  }
1463  }
1464  return PI_SUCCESS;
1465 }
1466 
1468  void (*)(pi_event, pi_int32, void *), void *) {
1470 }
1471 
1473 
1475  if (Event == nullptr) {
1476  return PI_ERROR_INVALID_EVENT;
1477  }
1478 
1479  ++(Event->RefCount);
1480 
1481  return PI_SUCCESS;
1482 }
1483 
1485  if (Event == nullptr || (Event->RefCount <= 0)) {
1486  return PI_ERROR_INVALID_EVENT;
1487  }
1488 
1489  if (--(Event->RefCount) == 0) {
1490  if (!Event->IsDummyEvent) {
1491  if ((Event->CmEventPtr == nullptr) || (Event->OwnerQueue == nullptr)) {
1492  return PI_ERROR_INVALID_EVENT;
1493  }
1494  int Result = Event->OwnerQueue->DestroyEvent(Event->CmEventPtr);
1495  if (Result != cm_support::CM_SUCCESS) {
1496  return PI_ERROR_INVALID_EVENT;
1497  }
1498  }
1499  delete Event;
1500  }
1501 
1502  return PI_SUCCESS;
1503 }
1504 
1507 }
1508 
1510  pi_event *) {
1512 }
1514  pi_sampler *) {
1516 }
1517 
1519  size_t *) {
1521 }
1522 
1524 
1526 
1528  pi_event *) {
1530 }
1531 
1533  pi_event *) {
1535 }
1536 
1538  pi_bool BlockingRead, size_t Offset,
1539  size_t Size, void *Dst,
1540  pi_uint32 NumEventsInWaitList,
1541  const pi_event *EventWaitList,
1542  pi_event *Event) {
1543  ARG_UNUSED(Queue);
1544  ARG_UNUSED(EventWaitList);
1545 
1547  if (BlockingRead) {
1548  PiTrace(
1549  "ESIMD_EMULATOR support for blocking piEnqueueMemBufferRead is NYI");
1550  return PI_ERROR_INVALID_OPERATION;
1551  }
1552 
1553  if (Offset != 0) {
1554  PiTrace("ESIMD_EMULATOR does not support buffer reading with offsets");
1555  return PI_ERROR_INVALID_ARG_VALUE;
1556  }
1557 
1558  if (NumEventsInWaitList != 0) {
1559  return PI_ERROR_INVALID_EVENT_WAIT_LIST;
1560  }
1561 
1562  _pi_buffer *buf = static_cast<_pi_buffer *>(Src);
1563 
1564  std::unique_ptr<_pi_event> RetEv{nullptr};
1565  if (Event) {
1566  RetEv = std::unique_ptr<_pi_event>(new _pi_event());
1567  RetEv->IsDummyEvent = true;
1568  }
1569 
1571  // CM does not provide 'ReadSurface' call for 'User-Provided'
1572  // Surface. memcpy is used for BufferRead PI_API call.
1573  memcpy(Dst, buf->MapHostPtr, Size);
1574  } else {
1576  return PI_ERROR_INVALID_MEM_OBJECT;
1577  }
1578  int Status = buf->SurfacePtr.RegularBufPtr->ReadSurface(
1579  reinterpret_cast<unsigned char *>(Dst),
1580  nullptr, // event
1581  static_cast<uint64_t>(Size));
1582 
1583  if (Status != cm_support::CM_SUCCESS) {
1584  return PI_ERROR_INVALID_MEM_OBJECT;
1585  }
1586  }
1587 
1588  if (Event) {
1589  *Event = RetEv.release();
1590  }
1591 
1592  return PI_SUCCESS;
1593 }
1594 
1597  pi_buff_rect_region, size_t, size_t,
1598  size_t, size_t, void *, pi_uint32,
1599  const pi_event *, pi_event *) {
1601 }
1602 
1604  const void *, pi_uint32, const pi_event *,
1605  pi_event *) {
1607 }
1608 
1611  pi_buff_rect_region, size_t, size_t,
1612  size_t, size_t, const void *, pi_uint32,
1613  const pi_event *, pi_event *) {
1615 }
1616 
1618  size_t, pi_uint32, const pi_event *,
1619  pi_event *) {
1621 }
1622 
1625  pi_buff_rect_region, size_t, size_t,
1626  size_t, size_t, pi_uint32,
1627  const pi_event *, pi_event *) {
1629 }
1630 
1631 pi_result piEnqueueMemBufferFill(pi_queue, pi_mem, const void *, size_t, size_t,
1632  size_t, pi_uint32, const pi_event *,
1633  pi_event *) {
1635 }
1636 
1638  pi_bool BlockingMap, pi_map_flags MapFlags,
1639  size_t Offset, size_t Size,
1640  pi_uint32 NumEventsInWaitList,
1641  const pi_event *EventWaitList, pi_event *Event,
1642  void **RetMap) {
1643  ARG_UNUSED(Queue);
1644  ARG_UNUSED(BlockingMap);
1645  ARG_UNUSED(MapFlags);
1646  ARG_UNUSED(NumEventsInWaitList);
1647  ARG_UNUSED(EventWaitList);
1648 
1649  std::unique_ptr<_pi_event> RetEv{nullptr};
1650  pi_result ret = PI_SUCCESS;
1651 
1652  if (Event) {
1653  RetEv = std::unique_ptr<_pi_event>(new _pi_event());
1654  RetEv->IsDummyEvent = true;
1655  }
1656 
1657  // Real mapping does not occur here and CPU-accessible address is
1658  // returned as the actual memory space for the buffer is located in
1659  // CPU memory and the plug-in know its base address
1660  // ('_pi_mem::MapHostPtr')
1661  *RetMap = MemObj->MapHostPtr + Offset;
1662 
1663  {
1664  std::lock_guard<std::mutex> Lock{MemObj->MappingsMutex};
1665  auto Res = MemObj->Mappings.insert({*RetMap, {Offset, Size}});
1666  // False as the second value in pair means that mapping was not inserted
1667  // because mapping already exists.
1668  if (!Res.second) {
1669  ret = PI_ERROR_INVALID_VALUE;
1670  PiTrace("piEnqueueMemBufferMap: duplicate mapping detected");
1671  }
1672  }
1673 
1674  if (Event) {
1675  *Event = RetEv.release();
1676  }
1677  return ret;
1678 }
1679 
1680 pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
1681  pi_uint32 NumEventsInWaitList,
1682  const pi_event *EventWaitList, pi_event *Event) {
1683  ARG_UNUSED(Queue);
1684  ARG_UNUSED(NumEventsInWaitList);
1685  ARG_UNUSED(EventWaitList);
1686 
1687  std::unique_ptr<_pi_event> RetEv{nullptr};
1688  pi_result ret = PI_SUCCESS;
1689 
1690  if (Event) {
1691  RetEv = std::unique_ptr<_pi_event>(new _pi_event());
1692  RetEv->IsDummyEvent = true;
1693  }
1694 
1695  // Real unmapping does not occur here and CPU-accessible address is
1696  // returned as the actual memory space for the buffer is located in
1697  // CPU memory and the plug-in knows its base address
1698  // ('_pi_mem::MapHostPtr')
1699  {
1700  std::lock_guard<std::mutex> Lock(MemObj->MappingsMutex);
1701  auto It = MemObj->Mappings.find(MappedPtr);
1702  if (It == MemObj->Mappings.end()) {
1703  ret = PI_ERROR_INVALID_VALUE;
1704  PiTrace("piEnqueueMemUnmap: unknown memory mapping");
1705  }
1706  MemObj->Mappings.erase(It);
1707  }
1708 
1709  if (Event) {
1710  *Event = RetEv.release();
1711  }
1712 
1713  return ret;
1714 }
1715 
1716 pi_result piMemImageGetInfo(pi_mem, pi_image_info, size_t, void *, size_t *) {
1718 }
1719 
1721  pi_bool BlockingRead, pi_image_offset Origin,
1722  pi_image_region Region, size_t RowPitch,
1723  size_t SlicePitch, void *Ptr,
1724  pi_uint32 NumEventsInWaitList,
1725  const pi_event *EventWaitList,
1726  pi_event *Event) {
1727  ARG_UNUSED(CommandQueue);
1728  ARG_UNUSED(NumEventsInWaitList);
1729  ARG_UNUSED(EventWaitList);
1730 
1732  if (BlockingRead) {
1733  PiTrace("ESIMD_EMULATOR support for blocking piEnqueueMemImageRead is NYI");
1734  return PI_ERROR_INVALID_OPERATION;
1735  }
1736 
1737  // SlicePitch is for 3D image while ESIMD_EMULATOR does not
1738  // support. For 2D surfaces, SlicePitch must be 0.
1739  if (SlicePitch != 0) {
1740  PiTrace("ESIMD_EMULATOR does not support 3D-image");
1741  return PI_ERROR_INVALID_ARG_VALUE;
1742  }
1743 
1744  // CM_EMU does not support ReadSurface with offset
1745  if (Origin->x != 0 || Origin->y != 0 || Origin->z != 0) {
1746  PiTrace("ESIMD_EMULATOR does not support 2D-image reading with offsets");
1747  return PI_ERROR_INVALID_ARG_VALUE;
1748  }
1749 
1750  _pi_image *PiImg = static_cast<_pi_image *>(Image);
1751 
1752  std::unique_ptr<_pi_event> RetEv{nullptr};
1753 
1754  if (Event) {
1755  RetEv = std::unique_ptr<_pi_event>(new _pi_event());
1756  RetEv->IsDummyEvent = true;
1757  }
1758 
1759  size_t Size = RowPitch * (Region->height);
1761  // CM does not provide 'ReadSurface' call for 'User-Provided'
1762  // Surface. memcpy is used for ImageRead PI_API call.
1763  memcpy(Ptr, PiImg->MapHostPtr, Size);
1764  } else {
1766  return PI_ERROR_INVALID_MEM_OBJECT;
1767  }
1768  int Status = PiImg->SurfacePtr.RegularImgPtr->ReadSurface(
1769  reinterpret_cast<unsigned char *>(Ptr),
1770  nullptr, // event
1771  static_cast<uint64_t>(Size));
1772 
1773  if (Status != cm_support::CM_SUCCESS) {
1774  return PI_ERROR_INVALID_MEM_OBJECT;
1775  }
1776  }
1777 
1778  if (Event) {
1779  *Event = RetEv.release();
1780  }
1781 
1782  return PI_SUCCESS;
1783 }
1784 
1786  pi_image_region, size_t, size_t, const void *,
1787  pi_uint32, const pi_event *, pi_event *) {
1789 }
1790 
1793  const pi_event *, pi_event *) {
1795 }
1796 
1797 pi_result piEnqueueMemImageFill(pi_queue, pi_mem, const void *, const size_t *,
1798  const size_t *, pi_uint32, const pi_event *,
1799  pi_event *) {
1801 }
1802 
1804  void *, pi_mem *) {
1806 }
1807 
1808 pi_result
1810  const size_t *GlobalWorkOffset,
1811  const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
1812  pi_uint32 NumEventsInWaitList,
1813  const pi_event *EventWaitList, pi_event *Event) {
1814  ARG_UNUSED(Queue);
1815  ARG_UNUSED(NumEventsInWaitList);
1816  ARG_UNUSED(EventWaitList);
1817 
1818  const size_t LocalWorkSz[] = {1, 1, 1};
1819 
1820  if (Kernel == nullptr) {
1821  return PI_ERROR_INVALID_KERNEL;
1822  }
1823 
1824  if (WorkDim > 3 || WorkDim == 0) {
1825  return PI_ERROR_INVALID_WORK_GROUP_SIZE;
1826  }
1827 
1828  if (isNull(WorkDim, LocalWorkSize)) {
1829  LocalWorkSize = LocalWorkSz;
1830  }
1831 
1832  for (pi_uint32 I = 0; I < WorkDim; I++) {
1833  if ((GlobalWorkSize[I] % LocalWorkSize[I]) != 0) {
1834  return PI_ERROR_INVALID_WORK_GROUP_SIZE;
1835  }
1836  }
1837 
1838  std::unique_ptr<_pi_event> RetEv{nullptr};
1839 
1840  if (Event) {
1841  RetEv = std::unique_ptr<_pi_event>(new _pi_event());
1842  RetEv->IsDummyEvent = true;
1843  }
1844 
1845  switch (WorkDim) {
1846  case 1:
1847  InvokeImpl<1>::invoke(Kernel, GlobalWorkOffset, GlobalWorkSize,
1848  LocalWorkSize);
1849  break;
1850  case 2:
1851  InvokeImpl<2>::invoke(Kernel, GlobalWorkOffset, GlobalWorkSize,
1852  LocalWorkSize);
1853  break;
1854  case 3:
1855  InvokeImpl<3>::invoke(Kernel, GlobalWorkOffset, GlobalWorkSize,
1856  LocalWorkSize);
1857  break;
1858  default:
1860  break;
1861  }
1862 
1863  if (Event) {
1864  *Event = RetEv.release();
1865  }
1866 
1867  return PI_SUCCESS;
1868 }
1869 
1871  pi_program, bool, pi_kernel *) {
1873 }
1874 
1877 }
1878 
1879 pi_result piEnqueueNativeKernel(pi_queue, void (*)(void *), void *, size_t,
1880  pi_uint32, const pi_mem *, const void **,
1881  pi_uint32, const pi_event *, pi_event *) {
1883 }
1884 
1886  pi_uint64 *) {
1888 }
1889 
1891  size_t, pi_uint32) {
1893 }
1894 
1896  pi_usm_mem_properties *, size_t, pi_uint32) {
1898 }
1899 
1900 pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context,
1901  pi_device Device,
1902  pi_usm_mem_properties *Properties, size_t Size,
1903  pi_uint32 Alignment) {
1904  ARG_UNUSED(Properties);
1905  ARG_UNUSED(Alignment);
1906 
1907  if (Context == nullptr || (Device != Context->Device)) {
1908  return PI_ERROR_INVALID_CONTEXT;
1909  }
1910 
1911  if (ResultPtr == nullptr) {
1912  return PI_ERROR_INVALID_OPERATION;
1913  }
1914 
1915  // 'Size' must be power of two in order to prevent memory corruption
1916  // error
1917  if ((Size & (Size - 1)) != 0) {
1918  Size = sycl::detail::getNextPowerOfTwo(Size);
1919  }
1920 
1921  cm_support::CmBufferSVM *Buf = nullptr;
1922  void *SystemMemPtr = nullptr;
1923  int32_t Result = Context->Device->CmDevicePtr->CreateBufferSVM(
1924  Size, SystemMemPtr, CM_SVM_ACCESS_FLAG_DEFAULT, Buf);
1925 
1926  if (Result != cm_support::CM_SUCCESS) {
1927  return PI_ERROR_OUT_OF_HOST_MEMORY;
1928  }
1929  *ResultPtr = SystemMemPtr;
1930  std::lock_guard<std::mutex> Lock(Context->Addr2CmBufferSVMLock);
1931  auto Iter = Context->Addr2CmBufferSVM.find(SystemMemPtr);
1932  if (Context->Addr2CmBufferSVM.end() != Iter) {
1933  return PI_ERROR_INVALID_MEM_OBJECT;
1934  }
1935  Context->Addr2CmBufferSVM[SystemMemPtr] = Buf;
1936  return PI_SUCCESS;
1937 }
1938 
1939 pi_result piextUSMFree(pi_context Context, void *Ptr) {
1940  if (Context == nullptr) {
1941  return PI_ERROR_INVALID_CONTEXT;
1942  }
1943  if (Ptr == nullptr) {
1944  return PI_ERROR_INVALID_OPERATION;
1945  }
1946 
1947  std::lock_guard<std::mutex> Lock(Context->Addr2CmBufferSVMLock);
1948  cm_support::CmBufferSVM *Buf = Context->Addr2CmBufferSVM[Ptr];
1949  if (Buf == nullptr) {
1950  return PI_ERROR_INVALID_MEM_OBJECT;
1951  }
1952  auto Count = Context->Addr2CmBufferSVM.erase(Ptr);
1953  if (Count != 1) {
1954  return PI_ERROR_INVALID_MEM_OBJECT;
1955  }
1956  int32_t Result = Context->Device->CmDevicePtr->DestroyBufferSVM(Buf);
1957  if (cm_support::CM_SUCCESS != Result) {
1958  return PI_ERROR_UNKNOWN;
1959  }
1960  return PI_SUCCESS;
1961 }
1962 
1965 }
1966 
1968  const pi_event *, pi_event *) {
1970 }
1971 
1972 pi_result piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, const void *, size_t,
1973  pi_uint32, const pi_event *, pi_event *) {
1975 }
1976 
1978  pi_mem_advice, pi_event *) {
1980 }
1981 
1982 pi_result piextUSMEnqueueFill2D(pi_queue, void *, size_t, size_t, const void *,
1983  size_t, size_t, pi_uint32, const pi_event *,
1984  pi_event *) {
1986 }
1987 
1988 pi_result piextUSMEnqueueMemset2D(pi_queue, void *, size_t, int, size_t, size_t,
1989  pi_uint32, const pi_event *, pi_event *) {
1991 }
1992 
1994  const void *, size_t, size_t, size_t,
1995  pi_uint32, const pi_event *, pi_event *) {
1997 }
1998 
2000  size_t, void *, size_t *) {
2002 }
2003 
2005  const void *) {
2007 }
2008 
2010  const void *) {
2012 }
2013 
2015  pi_uint32 RawImgSize, pi_uint32 *ImgInd) {
2018  if (RawImgSize != 1) {
2019  PiTrace("Only single device binary image is supported in ESIMD_EMULATOR");
2020  return PI_ERROR_INVALID_VALUE;
2021  }
2022  *ImgInd = 0;
2023  return PI_SUCCESS;
2024 }
2025 
2028  const pi_event *, pi_event *) {
2030 }
2031 
2033  const char *, pi_bool, size_t,
2034  size_t, const void *, pi_uint32,
2035  const pi_event *, pi_event *) {
2037 }
2038 
2040  const char *, pi_bool, size_t,
2041  size_t, void *, pi_uint32,
2042  const pi_event *, pi_event *) {
2044 }
2045 
2046 pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) {
2047  *OpaqueDataReturn = reinterpret_cast<void *>(PiESimdDeviceAccess);
2048  return PI_SUCCESS;
2049 }
2050 
2052  delete reinterpret_cast<sycl::detail::ESIMDEmuPluginOpaqueData *>(
2053  PiESimdDeviceAccess->data);
2054  delete PiESimdDeviceAccess;
2055 
2056  for (auto it = PiESimdSurfaceMap->begin(); it != PiESimdSurfaceMap->end();) {
2057  auto Mem = it->second;
2058  if (Mem != nullptr) {
2059  delete Mem;
2060  } // else { /* Null-entry for SLM_BTI */ }
2061  it = PiESimdSurfaceMap->erase(it);
2062  }
2063  return PI_SUCCESS;
2064 }
2065 
2066 pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime,
2067  uint64_t *hostTime) {
2068  PiTrace(
2069  "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR");
2070  return PI_SUCCESS;
2071 }
2073 
2075  if (PluginInit == nullptr) {
2076  return PI_ERROR_INVALID_VALUE;
2077  }
2078 
2079  // Check that the major version matches in PiVersion and SupportedVersion
2081 
2082  size_t PluginVersionSize = sizeof(PluginInit->PluginVersion);
2083  if (strlen(_PI_H_VERSION_STRING) >= PluginVersionSize) {
2084  return PI_ERROR_INVALID_VALUE;
2085  }
2086  strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize);
2087 
2088  PiESimdDeviceAccess = new sycl::detail::ESIMDEmuPluginOpaqueData();
2089  // 'version' to be compared with 'ESIMD_EMULATOR_DEVICE_REQUIRED_VER' defined
2090  // in device interface file
2092  PiESimdDeviceAccess->data =
2093  reinterpret_cast<void *>(new sycl::detail::ESIMDDeviceInterface());
2094 
2095  // Registering pre-defined surface index dedicated for SLM
2096  (*PiESimdSurfaceMap)[__ESIMD_DNS::SLM_BTI] = nullptr;
2097 
2098 #define _PI_API(api) \
2099  (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api);
2100 #include <sycl/detail/pi.def>
2101 
2102  return PI_SUCCESS;
2103 }
2104 
2105 } // extern C
void runIterationSpace(const sycl::range< DIMS > &LocalSize, const sycl::range< DIMS > &GlobalSize, const sycl::id< DIMS > &GlobalOffset)
libCMBatch(const KernelFunc< DIMS > &Kernel)
unsigned int SurfaceIndex
Surface index type.
Definition: common.hpp:64
__ESIMD_API void fence()
esimd::fence sets the memory read/write order.
Definition: memory.hpp:1096
__ESIMD_API void split_barrier()
Generic work-group split barrier.
Definition: memory.hpp:28
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:1109
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
void die(const char *Message)
Definition: pi.cpp:542
void assertion(bool Condition, const char *Message=nullptr)
Definition: pi.cpp:547
constexpr size_t getNextPowerOfTwo(size_t Var)
Definition: common.hpp:427
void memcpy(void *Dst, const void *Src, std::size_t Size)
static constexpr SurfaceIndex SLM_BTI
Definition: common.hpp:98
pi_bitfield pi_device_exec_capabilities
Definition: pi.h:538
int32_t pi_int32
Definition: pi.h:123
uintptr_t pi_native_handle
Definition: pi.h:128
pi_bitfield pi_map_flags
Definition: pi.h:585
_pi_result
Definition: pi.h:135
pi_uint32 pi_bool
Definition: pi.h:126
#define _PI_H_VERSION_STRING
Definition: pi.h:96
pi_bitfield pi_usm_mem_properties
Definition: pi.h:597
_pi_device_info
Definition: pi.h:204
@ PI_DEVICE_INFO_PRINTF_BUFFER_SIZE
Definition: pi.h:274
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR
Definition: pi.h:215
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D
Definition: pi.h:321
@ PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
Definition: pi.h:299
@ PI_DEVICE_INFO_GPU_SLICES
Definition: pi.h:296
@ PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC
Definition: pi.h:275
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT
Definition: pi.h:226
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:302
@ PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE
Definition: pi.h:210
@ PI_DEVICE_INFO_IMAGE_SRGB
Definition: pi.h:300
@ PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH
Definition: pi.h:237
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT
Definition: pi.h:216
@ PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT
Definition: pi.h:238
@ PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY
Definition: pi.h:229
@ PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT
Definition: pi.h:286
@ PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS
Definition: pi.h:208
@ PI_DEVICE_INFO_EXECUTION_CAPABILITIES
Definition: pi.h:260
@ PI_DEVICE_INFO_QUEUE_PROPERTIES
Definition: pi.h:214
@ PI_DEVICE_INFO_GPU_EU_COUNT
Definition: pi.h:294
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D
Definition: pi.h:322
@ PI_DEVICE_INFO_COMPILER_AVAILABLE
Definition: pi.h:258
@ PI_DEVICE_INFO_IMAGE_SUPPORT
Definition: pi.h:232
@ PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT
Definition: pi.h:253
@ PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES
Definition: pi.h:278
@ PI_DEVICE_INFO_BUILT_IN_KERNELS
Definition: pi.h:263
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT
Definition: pi.h:219
@ PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS
Definition: pi.h:281
@ PI_DEVICE_INFO_AVAILABLE
Definition: pi.h:257
@ PI_DEVICE_INFO_HALF_FP_CONFIG
Definition: pi.h:212
@ PI_DEVICE_INFO_PARENT_DEVICE
Definition: pi.h:276
@ PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL
Definition: pi.h:283
@ PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES
Definition: pi.h:314
@ PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
Definition: pi.h:295
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT
Definition: pi.h:223
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE
Definition: pi.h:227
@ PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE
Definition: pi.h:245
@ PI_DEVICE_INFO_DRIVER_VERSION
Definition: pi.h:269
@ PI_DEVICE_INFO_MAX_PARAMETER_SIZE
Definition: pi.h:243
@ PI_DEVICE_INFO_PARTITION_PROPERTIES
Definition: pi.h:277
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT
Definition: pi.h:217
@ PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT
Definition: pi.h:287
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF
Definition: pi.h:221
@ PI_DEVICE_INFO_VERSION
Definition: pi.h:271
@ PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH
Definition: pi.h:239
@ PI_DEVICE_INFO_MAX_COMPUTE_UNITS
Definition: pi.h:207
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR
Definition: pi.h:222
@ PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT
Definition: pi.h:236
@ PI_DEVICE_INFO_VENDOR_ID
Definition: pi.h:206
@ PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE
Definition: pi.h:246
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG
Definition: pi.h:218
@ PI_DEVICE_INFO_PLATFORM
Definition: pi.h:264
@ PI_DEVICE_INFO_LOCAL_MEM_SIZE
Definition: pi.h:252
@ PI_DEVICE_INFO_REFERENCE_COUNT
Definition: pi.h:265
@ PI_DEVICE_INFO_MAX_SAMPLERS
Definition: pi.h:242
@ PI_DEVICE_INFO_DEVICE_ID
Definition: pi.h:292
@ PI_DEVICE_INFO_PROFILE
Definition: pi.h:270
@ PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
Definition: pi.h:209
@ PI_DEVICE_INFO_EXTENSIONS
Definition: pi.h:273
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT
Definition: pi.h:224
@ PI_DEVICE_INFO_HOST_UNIFIED_MEMORY
Definition: pi.h:254
@ PI_DEVICE_INFO_PCI_ADDRESS
Definition: pi.h:293
@ PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE
Definition: pi.h:231
@ PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE
Definition: pi.h:247
@ PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE
Definition: pi.h:241
@ PI_DEVICE_INFO_LINKER_AVAILABLE
Definition: pi.h:259
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
Definition: pi.h:323
@ PI_DEVICE_INFO_ADDRESS_BITS
Definition: pi.h:230
@ PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE
Definition: pi.h:220
@ PI_DEVICE_INFO_ATOMIC_64
Definition: pi.h:313
@ PI_DEVICE_INFO_USM_HOST_SUPPORT
Definition: pi.h:284
@ PI_DEVICE_INFO_USM_DEVICE_SUPPORT
Definition: pi.h:285
@ PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES
Definition: pi.h:312
@ PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
Definition: pi.h:282
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
Definition: pi.h:298
@ PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS
Definition: pi.h:234
@ PI_DEVICE_INFO_IL_VERSION
Definition: pi.h:266
@ PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION
Definition: pi.h:255
@ PI_DEVICE_INFO_VENDOR
Definition: pi.h:268
@ PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN
Definition: pi.h:244
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG
Definition: pi.h:225
@ PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS
Definition: pi.h:233
@ PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN
Definition: pi.h:279
@ PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT
Definition: pi.h:288
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS
Definition: pi.h:320
@ PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE
Definition: pi.h:240
@ PI_DEVICE_INFO_DOUBLE_FP_CONFIG
Definition: pi.h:213
@ PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE
Definition: pi.h:249
@ PI_DEVICE_INFO_PARTITION_TYPE
Definition: pi.h:280
@ PI_DEVICE_INFO_TYPE
Definition: pi.h:205
@ PI_DEVICE_INFO_SINGLE_FP_CONFIG
Definition: pi.h:211
@ PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF
Definition: pi.h:228
@ PI_DEVICE_INFO_GLOBAL_MEM_SIZE
Definition: pi.h:248
@ PI_DEVICE_INFO_NAME
Definition: pi.h:267
@ PI_DEVICE_INFO_LOCAL_MEM_TYPE
Definition: pi.h:251
@ PI_DEVICE_INFO_MAX_CONSTANT_ARGS
Definition: pi.h:250
@ PI_DEVICE_INFO_OPENCL_C_VERSION
Definition: pi.h:272
@ PI_DEVICE_INFO_ENDIAN_LITTLE
Definition: pi.h:256
@ PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH
Definition: pi.h:235
@ PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
Definition: pi.h:297
static constexpr pi_device_fp_config PI_FP_DENORM
Definition: pi.h:683
_pi_queue_info
Definition: pi.h:354
_pi_mem_advice
Definition: pi.h:455
@ PI_IMAGE_CHANNEL_TYPE_FLOAT
Definition: pi.h:504
@ PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565
Definition: pi.h:494
@ PI_IMAGE_CHANNEL_TYPE_UNORM_INT8
Definition: pi.h:492
@ PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16
Definition: pi.h:498
@ PI_IMAGE_CHANNEL_TYPE_SNORM_INT16
Definition: pi.h:491
@ PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8
Definition: pi.h:500
@ PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8
Definition: pi.h:497
@ PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010
Definition: pi.h:496
@ PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32
Definition: pi.h:502
@ PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16
Definition: pi.h:501
@ PI_IMAGE_CHANNEL_TYPE_SNORM_INT8
Definition: pi.h:490
@ PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555
Definition: pi.h:495
@ PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32
Definition: pi.h:499
@ PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT
Definition: pi.h:503
@ PI_IMAGE_CHANNEL_TYPE_UNORM_INT16
Definition: pi.h:493
_pi_kernel_sub_group_info
Definition: pi.h:396
pi_bitfield pi_mem_properties
Definition: pi.h:591
_pi_kernel_group_info
Definition: pi.h:375
_pi_kernel_exec_info
Definition: pi.h:1355
_pi_platform_info
Definition: pi.h:150
@ PI_PLATFORM_INFO_VENDOR
Definition: pi.h:154
@ PI_PLATFORM_INFO_EXTENSIONS
Definition: pi.h:151
@ PI_PLATFORM_INFO_PROFILE
Definition: pi.h:153
@ PI_PLATFORM_INFO_NAME
Definition: pi.h:152
@ PI_PLATFORM_INFO_VERSION
Definition: pi.h:155
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
Definition: pi.h:614
_pi_image_info
Definition: pi.h:386
@ PI_USM_ATOMIC_ACCESS
Definition: pi.h:1668
@ PI_USM_ACCESS
Definition: pi.h:1667
@ PI_USM_CONCURRENT_ACCESS
Definition: pi.h:1669
@ PI_USM_CONCURRENT_ATOMIC_ACCESS
Definition: pi.h:1670
@ PI_IMAGE_CHANNEL_ORDER_RGBA
Definition: pi.h:477
@ PI_IMAGE_CHANNEL_ORDER_ARGB
Definition: pi.h:479
_pi_profiling_info
Definition: pi.h:564
_pi_usm_migration_flags
Definition: pi.h:1690
_pi_device_type
Definition: pi.h:182
@ PI_DEVICE_TYPE_GPU
A PI device that is a GPU.
Definition: pi.h:187
@ PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE
Definition: pi.h:196
uint64_t pi_uint64
Definition: pi.h:125
pi_bitfield pi_queue_properties
Definition: pi.h:610
pi_bitfield pi_sampler_properties
Definition: pi.h:544
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE
Definition: pi.h:616
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY
Definition: pi.h:581
_pi_buffer_create_type
Definition: pi.h:507
static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST
Definition: pi.h:685
uint32_t pi_uint32
Definition: pi.h:124
_pi_event_info
Definition: pi.h:403
@ PI_EVENT_INFO_COMMAND_EXECUTION_STATUS
Definition: pi.h:407
static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF
Definition: pi.h:687
intptr_t pi_context_properties
Definition: pi.h:536
pi_bitfield pi_mem_flags
Definition: pi.h:575
constexpr pi_queue_properties PI_QUEUE_FLAGS
Definition: pi.h:611
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:580
void(* pi_context_extended_deleter)(void *user_data)
Definition: pi.h:1112
@ PI_MEM_TYPE_IMAGE1D_BUFFER
Definition: pi.h:452
@ PI_MEM_TYPE_IMAGE1D
Definition: pi.h:450
@ PI_MEM_TYPE_IMAGE1D_ARRAY
Definition: pi.h:451
@ PI_MEM_TYPE_IMAGE2D
Definition: pi.h:447
@ PI_MEM_TYPE_IMAGE2D_ARRAY
Definition: pi.h:449
@ PI_MEM_TYPE_IMAGE3D
Definition: pi.h:448
_pi_sampler_info
Definition: pi.h:512
static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
Definition: pi.h:690
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:577
_pi_kernel_info
Definition: pi.h:366
_pi_mem_alloc_info
Definition: pi.h:1673
@ PI_DEVICE_LOCAL_MEM_TYPE_LOCAL
Definition: pi.h:200
_pi_context_info
Definition: pi.h:339
@ PI_EVENT_COMPLETE
Definition: pi.h:144
@ PI_EVENT_RUNNING
Definition: pi.h:145
static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO
Definition: pi.h:686
static constexpr pi_device_fp_config PI_FP_FMA
Definition: pi.h:688
#define _PI_PLUGIN_VERSION_CHECK(PI_API_VERSION, PI_PLUGIN_VERSION)
Definition: pi.h:102
_pi_program_info
Definition: pi.h:327
intptr_t pi_device_partition_property
Definition: pi.h:654
constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL
Definition: pi.h:539
static constexpr pi_device_fp_config PI_FP_INF_NAN
Definition: pi.h:684
_pi_mem_info
Definition: pi.h:976
pi_bitfield pi_device_affinity_domain
Definition: pi.h:667
_pi_program_build_info
Definition: pi.h:158
pi_result piSamplerRetain(pi_sampler)
void sycl_get_cm_image_params(unsigned int IndexInput, char **BaseAddr, uint32_t *Width, uint32_t *Height, uint32_t *Bpp, std::mutex **ImgMtxLock)
pi_result piextGetDeviceFunctionPointer(pi_device, pi_program, const char *, pi_uint64 *)
Retrieves a device function pointer to a user-defined function.
pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
pi_result piclProgramCreateWithSource(pi_context, pi_uint32, const char **, const size_t *, pi_program *)
pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *properties)
pi_result piMemRelease(pi_mem Mem)
thread_local char ErrorMessage[MaxMessageSize]
static sycl::detail::ESIMDEmuPluginOpaqueData * PiESimdDeviceAccess
pi_result piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, const pi_event *, pi_event *)
pi_result piextPlatformGetNativeHandle(pi_platform, pi_native_handle *)
Gets the native handle of a PI platform object.
pi_result piProgramGetBuildInfo(pi_program, pi_device, pi_program_build_info, size_t, void *, size_t *)
cm_support::CM_SURFACE_FORMAT ConvertPiImageFormatToCmFormat(const pi_image_format *PiFormat)
pi_result piSamplerRelease(pi_sampler)
pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event)
pi_result piDevicePartition(pi_device, const pi_device_partition_property *, pi_uint32, pi_device *, pi_uint32 *)
static std::unordered_map< unsigned int, _pi_mem * > * PiESimdSurfaceMap
pi_result piextKernelSetArgMemObj(pi_kernel, pi_uint32, const pi_mem *)
pi_result piEnqueueMemBufferCopyRect(pi_queue, pi_mem, pi_mem, pi_buff_rect_offset, pi_buff_rect_offset, pi_buff_rect_region, size_t, size_t, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
pi_result piQueueFinish(pi_queue)
pi_result piQueueRetain(pi_queue Queue)
pi_result piQueueRelease(pi_queue Queue)
pi_result piEnqueueMemImageFill(pi_queue, pi_mem, const void *, const size_t *, const size_t *, pi_uint32, const pi_event *, pi_event *)
pi_result piQueueCreate(pi_context Context, pi_device Device, pi_queue_properties Properties, pi_queue *Queue)
pi_result piMemBufferPartition(pi_mem, pi_mem_flags, pi_buffer_create_type, void *, pi_mem *)
pi_result piextProgramSetSpecializationConstant(pi_program, pi_uint32, size_t, const void *)
Sets a specialization constant to a specific value.
static std::mutex * PiESimdSurfaceMapLock
pi_result piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event)
pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, pi_bool BlockingRead, size_t Offset, size_t Size, void *Dst, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event)
pi_result piEnqueueMemBufferWrite(pi_queue, pi_mem, pi_bool, size_t, size_t, const void *, pi_uint32, const pi_event *, pi_event *)
pi_result piQueueGetInfo(pi_queue, pi_queue_info, size_t, void *, size_t *)
pi_result piKernelCreate(pi_program, const char *, pi_kernel *)
pi_result piextUSMEnqueuePrefetch(pi_queue, const void *, size_t, pi_usm_migration_flags, pi_uint32, const pi_event *, pi_event *)
Hint to migrate memory to the device.
pi_result piclProgramCreateWithBinary(pi_context, pi_uint32, const pi_device *, const size_t *, const unsigned char **, pi_int32 *, pi_program *)
thread_local pi_result ErrorMessageCode
pi_result piEnqueueNativeKernel(pi_queue, void(*)(void *), void *, size_t, pi_uint32, const pi_mem *, const void **, pi_uint32, const pi_event *, pi_event *)
pi_result piDeviceRelease(pi_device Device)
pi_result piextUSMFree(pi_context Context, void *Ptr)
Indicates that the allocated USM memory is no longer needed on the runtime side.
pi_result piMemRetain(pi_mem Mem)
pi_result piextDeviceSelectBinary(pi_device, pi_device_binary *, pi_uint32 RawImgSize, pi_uint32 *ImgInd)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
#define CONTINUE_NO_IMPLEMENTATION
#define ESIMDEmuPluginInterfaceVersion
pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue, pi_program, const char *, pi_bool, size_t, size_t, void *, pi_uint32, const pi_event *, pi_event *)
API reading data from a device global variable to host.
pi_result piTearDown(void *)
API to notify that the plugin should clean up its resources.
pi_result piProgramGetInfo(pi_program, pi_program_info, size_t, void *, size_t *)
pi_result piKernelGetGroupInfo(pi_kernel, pi_device, pi_kernel_group_info, size_t, void *, size_t *)
pi_result piEnqueueMemBufferCopy(pi_queue, pi_mem, pi_mem, size_t, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
pi_result piextQueueCreate(pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue)
static void PiTrace(std::string TraceString)
pi_result piextContextGetNativeHandle(pi_context, pi_native_handle *)
Gets the native handle of a PI context object.
pi_result piMemImageGetInfo(pi_mem, pi_image_info, size_t, void *, size_t *)
pi_result piProgramCreateWithBinary(pi_context, pi_uint32, const pi_device *, const size_t *, const unsigned char **, size_t, const pi_device_binary_property *, pi_int32 *, pi_program *)
Creates a PI program for a context and loads the given binary into it.
constexpr size_t MaxMessageSize
pi_result piEnqueueMemBufferFill(pi_queue, pi_mem, const void *, size_t, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
pi_result piextContextSetExtendedDeleter(pi_context, pi_context_extended_deleter, void *)
pi_result piEventRelease(pi_event Event)
pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet)
pi_result piextPlatformCreateWithNativeHandle(pi_native_handle, pi_platform *)
Creates PI platform object from a native handle.
pi_result piPluginInit(pi_plugin *PluginInit)
pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet)
pi_result piMemGetInfo(pi_mem, pi_mem_info, size_t, void *, size_t *)
pi_result piEnqueueEventsWait(pi_queue, pi_uint32, const pi_event *, pi_event *)
pi_result piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, const void *, size_t, pi_uint32, const pi_event *, pi_event *)
USM Memcpy API.
pi_result piextUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, pi_uint32, const pi_event *, pi_event *)
USM Memset API.
pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, pi_device Device, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment)
Allocates memory accessible on both host and device.
unsigned int sycl_get_cm_surface_index(void *PtrInput)
static bool isNull(int NDims, const size_t *R)
Implementation for Host Kernel Launch used by piEnqueueKernelLaunch.
pi_result piextContextCreateWithNativeHandle(pi_native_handle, pi_uint32, const pi_device *, bool, pi_context *)
Creates PI context object from a native handle.
pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime)
Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the h...
pi_result piKernelRetain(pi_kernel)
pi_result piextUSMEnqueueMemAdvise(pi_queue, const void *, size_t, pi_mem_advice, pi_event *)
USM Memadvise API.
pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem MemObj, pi_bool BlockingMap, pi_map_flags MapFlags, size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event, void **RetMap)
const char SupportedVersion[]
pi_result piContextRelease(pi_context Context)
pi_result piEnqueueMemBufferReadRect(pi_queue, pi_mem, pi_bool, pi_buff_rect_offset, pi_buff_rect_offset, pi_buff_rect_region, size_t, size_t, size_t, size_t, void *, pi_uint32, const pi_event *, pi_event *)
pi_result piKernelSetArg(pi_kernel, pi_uint32, size_t, const void *)
pi_result piEnqueueMemImageWrite(pi_queue, pi_mem, pi_bool, pi_image_offset, pi_image_region, size_t, size_t, const void *, pi_uint32, const pi_event *, pi_event *)
pi_result piextProgramGetNativeHandle(pi_program, pi_native_handle *)
Gets the native handle of a PI program object.
pi_result piKernelGetInfo(pi_kernel, pi_kernel_info, size_t, void *, size_t *)
pi_result piContextGetInfo(pi_context, pi_context_info, size_t, void *, size_t *)
pi_result piSamplerGetInfo(pi_sampler, pi_sampler_info, size_t, void *, size_t *)
pi_result piextKernelSetArgSampler(pi_kernel, pi_uint32, const pi_sampler *)
pi_result piEnqueueMemBufferWriteRect(pi_queue, pi_mem, pi_bool, pi_buff_rect_offset, pi_buff_rect_offset, pi_buff_rect_region, size_t, size_t, size_t, size_t, const void *, pi_uint32, const pi_event *, pi_event *)
pi_result piProgramLink(pi_context, pi_uint32, const pi_device *, const char *, pi_uint32, const pi_program *, void(*)(pi_program, void *), void *, pi_program *)
pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet)
std::function< void(const sycl::nd_item< NDims > &)> KernelFunc
pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList)
pi_result piextKernelCreateWithNativeHandle(pi_native_handle, pi_context, pi_program, bool, pi_kernel *)
Creates PI kernel object from a native handle.
pi_result piextEventGetNativeHandle(pi_event, pi_native_handle *)
Gets the native handle of a PI event object.
pi_result piEnqueueMemImageRead(pi_queue CommandQueue, pi_mem Image, pi_bool BlockingRead, pi_image_offset Origin, pi_image_region Region, size_t RowPitch, size_t SlicePitch, void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event)
pi_result piSamplerCreate(pi_context, const pi_sampler_properties *, pi_sampler *)
pi_result piextEventCreateWithNativeHandle(pi_native_handle, pi_context, bool, pi_event *)
Creates PI event object from a native handle.
pi_result piEventCreate(pi_context, pi_event *)
Create PI event object in a signalled/completed state.
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
pi_result piextKernelSetArgPointer(pi_kernel, pi_uint32, size_t, const void *)
Sets up pointer arguments for CL kernels.
#define CASE_PI_UNSUPPORTED(not_supported)
pi_result piextUSMEnqueueMemcpy2D(pi_queue, pi_bool, void *, size_t, const void *, size_t, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
USM 2D Memcpy API.
pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, void *HostPtr, pi_mem *RetImage)
void sycl_get_cm_buffer_params(unsigned int IndexInput, char **BaseAddr, uint32_t *Width, std::mutex **BufMtxLock)
pi_result piextQueueGetNativeHandle(pi_queue, pi_native_handle *)
Gets the native handle of a PI queue object.
pi_result piextMemGetNativeHandle(pi_mem, pi_native_handle *)
Gets the native handle of a PI mem object.
#define DIE_NO_IMPLEMENTATION
pi_result piProgramCreate(pi_context, const void *, size_t, pi_program *)
pi_result piProgramRelease(pi_program)
pi_result piextQueueCreateWithNativeHandle(pi_native_handle, pi_context, pi_device, bool, pi_queue *)
Creates PI queue object from a native handle.
pi_result piextUSMHostAlloc(void **, pi_context, pi_usm_mem_properties *, size_t, pi_uint32)
Allocates host memory accessible by the device.
static bool PrintPiTrace
pi_result piProgramRetain(pi_program)
pi_result piextUSMEnqueueFill2D(pi_queue, void *, size_t, size_t, const void *, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
USM 2D fill API.
pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, pi_uint32 *NumPlatforms)
pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices)
sycl::detail::Builder IDBuilder
void InvokeKernel(KernelInvocationContext< NDims > *ctx)
pi_result piEventSetCallback(pi_event, pi_int32, void(*)(pi_event, pi_int32, void *), void *)
pi_result piKernelSetExecInfo(pi_kernel, pi_kernel_exec_info, size_t, const void *)
API to set attributes controlling kernel execution.
pi_result piContextCreate(const pi_context_properties *Properties, pi_uint32 NumDevices, const pi_device *Devices, void(*PFnNotify)(const char *ErrInfo, const void *PrivateInfo, size_t CB, void *UserData), void *UserData, pi_context *RetContext)
#define ARG_UNUSED(x)
pi_result piProgramCompile(pi_program, pi_uint32, const pi_device *, const char *, pi_uint32, const pi_program *, const char **, void(*)(pi_program, void *), void *)
pi_result piextKernelGetNativeHandle(pi_kernel, pi_native_handle *)
Gets the native handle of a PI kernel object.
pi_result piextUSMDeviceAlloc(void **, pi_context, pi_device, pi_usm_mem_properties *, size_t, pi_uint32)
Allocates device memory.
pi_result piProgramBuild(pi_program, pi_uint32, const pi_device *, const char *, void(*)(pi_program, void *), void *)
pi_result piextUSMGetMemAllocInfo(pi_context, const void *, pi_mem_alloc_info, size_t, void *, size_t *)
API to query information about USM allocated pointers Valid Queries: PI_MEM_ALLOC_TYPE returns host/d...
pi_result piEventRetain(pi_event Event)
pi_result piextMemCreateWithNativeHandle(pi_native_handle, pi_context, bool, pi_mem *)
Creates PI mem object from a native handle.
static void setErrorMessage(const char *message, pi_result error_code)
pi_result piextDeviceGetNativeHandle(pi_device, pi_native_handle *)
Gets the native handle of a PI device object.
pi_result piKernelRelease(pi_kernel)
pi_result piDeviceRetain(pi_device Device)
pi_result piKernelGetSubGroupInfo(pi_kernel, pi_device, pi_kernel_sub_group_info, size_t, const void *, size_t, void *, size_t *)
API to query information from the sub-group from a kernel.
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle, pi_platform, pi_device *)
Creates PI device object from a native handle.
static std::mutex * PiPlatformCacheLock
#define ESIMDEmuPluginDataVersion
pi_result piContextRetain(pi_context Context)
pi_result piQueueFlush(pi_queue)
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue, pi_program, const char *, pi_bool, size_t, size_t, const void *, pi_uint32, const pi_event *, pi_event *)
Device global variable.
pi_result piextUSMEnqueueMemset2D(pi_queue, void *, size_t, int, size_t, size_t, pi_uint32, const pi_event *, pi_event *)
USM 2D Memset API.
pi_result piEventSetStatus(pi_event, pi_int32)
pi_result piextProgramCreateWithNativeHandle(pi_native_handle, pi_context, bool, pi_program *)
Creates PI program object from a native handle.
static char ESimdEmuVersionString[32]
pi_result piEnqueueMemImageCopy(pi_queue, pi_mem, pi_mem, pi_image_offset, pi_image_offset, pi_image_region, pi_uint32, const pi_event *, pi_event *)
static pi_platform PiPlatformCache
pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn)
Plugin.
Declarations for CM Emulation Plugin.
#define _PI_ESIMD_PLUGIN_VERSION_STRING
@ Device
static void invoke(pi_kernel Kernel, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize)
static sycl::range< NDims > get_range(const size_t *Array)
const sycl::range< NDims > & LocalSize
const sycl::id< NDims > & GlobalOffset
const sycl::range< NDims > & GlobalSize
KernelFunc< NDims > Func
static sycl::range< 1 > create(Gen G)
static sycl::range< 2 > create(Gen G)
static sycl::range< 3 > create(Gen G)
PI context mapping to a CUDA context object.
Definition: pi_cuda.hpp:166
std::unordered_map< void *, cm_support::CmBufferSVM * > Addr2CmBufferSVM
std::mutex Addr2CmBufferSVMLock
bool checkSurfaceArgument(pi_mem_flags Flags, void *HostPtr)
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:84
cm_support::CmDevice * CmDevicePtr
PI Event mapping to CUevent.
Definition: pi_cuda.hpp:637
cm_support::CmEvent * CmEventPtr
pi_result release()
Definition: pi_cuda.cpp:638
cm_support::CmQueue * OwnerQueue
size_t image_height
Definition: pi.h:963
size_t image_width
Definition: pi.h:962
pi_mem_type image_type
Definition: pi.h:961
pi_image_channel_type image_channel_data_type
Definition: pi.h:957
pi_image_channel_order image_channel_order
Definition: pi.h:956
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:821
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:229
std::mutex SurfaceLock
unsigned int SurfaceIndex
pi_context Context
std::unordered_map< void *, Mapping > Mappings
cm_surface_ptr_t SurfacePtr
~_pi_mem()
Definition: pi_cuda.hpp:370
std::mutex MappingsMutex
std::atomic< pi_uint32 > RefCount
A PI platform stores all known PI devices, in the CUDA plugin this is just a vector of available devi...
Definition: pi_cuda.hpp:74
std::mutex PiDeviceCacheMutex
std::string CmEmuVersion
pi_result populateDeviceCacheIfNeeded()
std::unique_ptr< _pi_device > PiDeviceCache
char PluginVersion[20]
Definition: pi.h:1980
char PiVersion[20]
Definition: pi.h:1978
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:765
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:400
cm_support::CmQueue * CmQueuePtr
Implementation of samplers for CUDA.
Definition: pi_cuda.hpp:992
cm_support::CmSurface2D * RegularImgPtr
cm_support::CmBufferUP * UPBufPtr
cm_support::CmSurface2DUP * UPImgPtr
cm_support::CmBuffer * RegularBufPtr
This struct is a record of the device binary information.
Definition: pi.h:822
bool PiPlatformCachePopulated
Definition: ur.cpp:26
zer_result_t getInfoArray(size_t array_length, size_t param_value_size, void *param_value, size_t *param_value_size_ret, const T *value)
Definition: ur.hpp:253
zer_result_t getInfo(size_t param_value_size, void *param_value, size_t *param_value_size_ret, T value)
Definition: ur.hpp:240
zer_result_t getInfo< const char * >(size_t param_value_size, void *param_value, size_t *param_value_size_ret, const char *value)
Definition: ur.hpp:276
zer_result_t getInfoImpl(size_t param_value_size, void *param_value, size_t *param_value_size_ret, T value, size_t value_size, Assign &&assign_func)
Definition: ur.hpp:219