DPC++ Runtime
Runtime libraries for oneAPI Data Parallel C++
program_manager.cpp
Go to the documentation of this file.
1 //==------ program_manager.cpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <CL/sycl/context.hpp>
14 #include <CL/sycl/detail/util.hpp>
15 #include <CL/sycl/device.hpp>
16 #include <CL/sycl/exception.hpp>
17 #include <CL/sycl/stl.hpp>
18 #include <detail/config.hpp>
19 #include <detail/context_impl.hpp>
21 #include <detail/device_impl.hpp>
24 #include <detail/program_impl.hpp>
28 
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32 #include <cstdlib>
33 #include <cstring>
34 #include <fstream>
35 #include <memory>
36 #include <mutex>
37 #include <sstream>
38 #include <string>
39 
41 namespace sycl {
42 namespace detail {
43 
44 using ContextImplPtr = std::shared_ptr<cl::sycl::detail::context_impl>;
45 
46 static constexpr int DbgProgMgr = 0;
47 
49 
50 static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
51 
55  const plugin &Plugin) {
57  constexpr char SpecValue = 1;
59  Prog, ITTSpecConstId, sizeof(char), &SpecValue);
60  }
61 }
62 
63 ProgramManager &ProgramManager::getInstance() {
64  return GlobalHandler::instance().getProgramManager();
65 }
66 
67 static RT::PiProgram
68 createBinaryProgram(const ContextImplPtr Context, const device &Device,
69  const unsigned char *Data, size_t DataLen,
70  const std::vector<pi_device_binary_property> Metadata) {
71  const detail::plugin &Plugin = Context->getPlugin();
72 #ifndef _NDEBUG
73  pi_uint32 NumDevices = 0;
74  Plugin.call<PiApiKind::piContextGetInfo>(Context->getHandleRef(),
76  sizeof(NumDevices), &NumDevices,
77  /*param_value_size_ret=*/nullptr);
78  assert(NumDevices > 0 &&
79  "Only a single device is supported for AOT compilation");
80 #endif
81 
82  RT::PiProgram Program;
83  const RT::PiDevice PiDevice = getSyclObjImpl(Device)->getHandleRef();
84  pi_int32 BinaryStatus = CL_SUCCESS;
86  Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data,
87  Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
88 
89  if (BinaryStatus != CL_SUCCESS) {
90  throw runtime_error("Creating program with binary failed.", BinaryStatus);
91  }
92 
93  return Program;
94 }
95 
97  const unsigned char *Data,
98  size_t DataLen) {
99  RT::PiProgram Program = nullptr;
100  const detail::plugin &Plugin = Context->getPlugin();
101  Plugin.call<PiApiKind::piProgramCreate>(Context->getHandleRef(), Data,
102  DataLen, &Program);
103  return Program;
104 }
105 
107 ProgramManager::getDeviceImage(OSModuleHandle M, const std::string &KernelName,
108  const context &Context, const device &Device,
109  bool JITCompilationIsRequired) {
110  if (DbgProgMgr > 0)
111  std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \""
112  << KernelName << "\", " << getRawSyclObjImpl(Context) << ", "
113  << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired
114  << ")\n";
115 
116  KernelSetId KSId = getKernelSetId(M, KernelName);
117  return getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
118 }
119 
120 template <typename ExceptionT, typename RetT>
123  // any thread which will find nullptr in cache will wait until the pointer
124  // is not null anymore
125  Cache.waitUntilBuilt(*BuildResult, [BuildResult]() {
126  int State = BuildResult->State.load();
127 
128  return State == BS_Done || State == BS_Failed;
129  });
130 
131  if (BuildResult->Error.isFilledIn()) {
132  const KernelProgramCache::BuildError &Error = BuildResult->Error;
133  throw ExceptionT(Error.Msg, Error.Code);
134  }
135 
136  RetT *Result = BuildResult->Ptr.load();
137 
138  return Result;
139 }
140 
158 template <typename RetT, typename ExceptionT, typename KeyT, typename AcquireFT,
159  typename GetCacheFT, typename BuildFT>
161 getOrBuild(KernelProgramCache &KPCache, KeyT &&CacheKey, AcquireFT &&Acquire,
162  GetCacheFT &&GetCache, BuildFT &&Build) {
163  bool InsertionTookPlace;
165 
166  {
167  auto LockedCache = Acquire(KPCache);
168  auto &Cache = GetCache(LockedCache);
169  auto Inserted =
170  Cache.emplace(std::piecewise_construct, std::forward_as_tuple(CacheKey),
171  std::forward_as_tuple(nullptr, BS_InProgress));
172 
173  InsertionTookPlace = Inserted.second;
174  BuildResult = &Inserted.first->second;
175  }
176 
177  // no insertion took place, thus some other thread has already inserted smth
178  // in the cache
179  if (!InsertionTookPlace) {
180  for (;;) {
181  RetT *Result = waitUntilBuilt<ExceptionT>(KPCache, BuildResult);
182 
183  if (Result)
184  return BuildResult;
185 
186  // Previous build is failed. There was no SYCL exception though.
187  // We might try to build once more.
188  int Expected = BS_Failed;
189  int Desired = BS_InProgress;
190 
191  if (BuildResult->State.compare_exchange_strong(Expected, Desired))
192  break; // this thread is the building thread now
193  }
194  }
195 
196  // only the building thread will run this
197  try {
198  RetT *Desired = Build();
199 
200 #ifndef NDEBUG
201  RetT *Expected = nullptr;
202 
203  if (!BuildResult->Ptr.compare_exchange_strong(Expected, Desired))
204  // We've got a funny story here
205  assert(false && "We've build an entity that is already have been built.");
206 #else
207  BuildResult->Ptr.store(Desired);
208 #endif
209 
210  {
211  // Even if shared variable is atomic, it must be modified under the mutex
212  // in order to correctly publish the modification to the waiting thread
213  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
214  BuildResult->State.store(BS_Done);
215  }
216 
217  KPCache.notifyAllBuild(*BuildResult);
218 
219  return BuildResult;
220  } catch (const exception &Ex) {
221  BuildResult->Error.Msg = Ex.what();
222  BuildResult->Error.Code = Ex.get_cl_code();
223 
224  {
225  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
226  BuildResult->State.store(BS_Failed);
227  }
228 
229  KPCache.notifyAllBuild(*BuildResult);
230 
231  std::rethrow_exception(std::current_exception());
232  } catch (...) {
233  {
234  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
235  BuildResult->State.store(BS_Failed);
236  }
237 
238  KPCache.notifyAllBuild(*BuildResult);
239 
240  std::rethrow_exception(std::current_exception());
241  }
242 }
243 
244 // TODO replace this with a new PI API function
246  RT::PiDeviceBinaryType Format) {
247  // All formats except PI_DEVICE_BINARY_TYPE_SPIRV are supported.
248  if (Format != PI_DEVICE_BINARY_TYPE_SPIRV)
249  return true;
250 
251  const backend ContextBackend =
252  detail::getSyclObjImpl(C)->getPlugin().getBackend();
253 
254  // The CUDA backend cannot use SPIR-V
255  if (ContextBackend == backend::cuda)
256  return false;
257 
258  std::vector<device> Devices = C.get_devices();
259 
260  // Program type is SPIR-V, so we need a device compiler to do JIT.
261  for (const device &D : Devices) {
262  if (!D.get_info<info::device::is_compiler_available>())
263  return false;
264  }
265 
266  // OpenCL 2.1 and greater require clCreateProgramWithIL
267  if (ContextBackend == backend::opencl) {
268  std::string ver = C.get_platform().get_info<info::platform::version>();
269  if (ver.find("OpenCL 1.0") == std::string::npos &&
270  ver.find("OpenCL 1.1") == std::string::npos &&
271  ver.find("OpenCL 1.2") == std::string::npos &&
272  ver.find("OpenCL 2.0") == std::string::npos)
273  return true;
274  }
275 
276  for (const device &D : Devices) {
277  // We need cl_khr_il_program extension to be present
278  // and we can call clCreateProgramWithILKHR using the extension
279  std::vector<std::string> Extensions =
280  D.get_info<info::device::extensions>();
281  if (Extensions.end() ==
282  std::find(Extensions.begin(), Extensions.end(), "cl_khr_il_program"))
283  return false;
284  }
285 
286  return true;
287 }
288 
289 static const char *getFormatStr(RT::PiDeviceBinaryType Format) {
290  switch (Format) {
292  return "none";
294  return "native";
296  return "SPIR-V";
298  return "LLVM IR";
299  }
300  assert(false && "Unknown device image format");
301  return "unknown";
302 }
303 
304 RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img,
305  const context &Context,
306  const device &Device) {
307  if (DbgProgMgr > 0)
308  std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", "
309  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
310  << ")\n";
311  const pi_device_binary_struct &RawImg = Img.getRawData();
312 
313  // perform minimal sanity checks on the device image and the descriptor
314  if (RawImg.BinaryEnd < RawImg.BinaryStart) {
315  throw runtime_error("Malformed device program image descriptor",
317  }
318  if (RawImg.BinaryEnd == RawImg.BinaryStart) {
319  throw runtime_error("Invalid device program image: size is zero",
321  }
322  size_t ImgSize = Img.getSize();
323 
324  // TODO if the binary image is a part of the fat binary, the clang
325  // driver should have set proper format option to the
326  // clang-offload-wrapper. The fix depends on AOT compilation
327  // implementation, so will be implemented together with it.
328  // Img->Format can't be updated as it is inside of the in-memory
329  // OS module binary.
330  RT::PiDeviceBinaryType Format = Img.getFormat();
331 
332  if (Format == PI_DEVICE_BINARY_TYPE_NONE)
333  Format = pi::getBinaryImageFormat(RawImg.BinaryStart, ImgSize);
334  // RT::PiDeviceBinaryType Format = Img->Format;
335  // assert(Format != PI_DEVICE_BINARY_TYPE_NONE && "Image format not set");
336 
337  if (!isDeviceBinaryTypeSupported(Context, Format))
338  throw feature_not_supported(
339  "SPIR-V online compilation is not supported in this context",
341 
342  // Get program metadata from properties
343  auto ProgMetadata = Img.getProgramMetadata();
344  std::vector<pi_device_binary_property> ProgMetadataVector{
345  ProgMetadata.begin(), ProgMetadata.end()};
346 
347  // Load the image
348  const ContextImplPtr Ctx = getSyclObjImpl(Context);
350  ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize)
351  : createBinaryProgram(Ctx, Device, RawImg.BinaryStart,
352  ImgSize, ProgMetadataVector);
353 
354  {
355  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
356  // associate the PI program with the image it was created for
357  NativePrograms[Res] = &Img;
358  }
359 
360  if (DbgProgMgr > 1)
361  std::cerr << "created program: " << Res
362  << "; image format: " << getFormatStr(Format) << "\n";
363 
364  return Res;
365 }
366 static void applyOptionsFromImage(std::string &CompileOpts,
367  std::string &LinkOpts,
368  const RTDeviceBinaryImage &Img) {
369  // Build options are overridden if environment variables are present.
370  // Environment variables are not changed during program lifecycle so it
371  // is reasonable to use static here to read them only once.
372  static const char *CompileOptsEnv =
374  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
375  // Update only if compile options are not overwritten by environment
376  // variable
377  if (!CompileOptsEnv) {
378  if (!CompileOpts.empty())
379  CompileOpts += " ";
380  CompileOpts += Img.getCompileOptions();
381  }
382 
383  // The -vc-codegen option is always preserved for ESIMD kernels, regardless
384  // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable.
385  pi_device_binary_property isEsimdImage = Img.getProperty("isEsimdImage");
386  if (isEsimdImage && pi::DeviceBinaryProperty(isEsimdImage).asUint32()) {
387  if (!CompileOpts.empty())
388  CompileOpts += " ";
389  CompileOpts += "-vc-codegen";
390  }
391 
392  // Update only if link options are not overwritten by environment variable
393  if (!LinkOptsEnv)
394  if (!LinkOpts.empty())
395  LinkOpts += " ";
396  LinkOpts += Img.getLinkOptions();
397 }
398 
399 static void applyOptionsFromEnvironment(std::string &CompileOpts,
400  std::string &LinkOpts) {
401  // Build options are overridden if environment variables are present.
402  // Environment variables are not changed during program lifecycle so it
403  // is reasonable to use static here to read them only once.
404  static const char *CompileOptsEnv =
406  if (CompileOptsEnv) {
407  CompileOpts = CompileOptsEnv;
408  }
409  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
410  if (LinkOptsEnv) {
411  LinkOpts = LinkOptsEnv;
412  }
413 }
414 
415 std::pair<RT::PiProgram, bool> ProgramManager::getOrCreatePIProgram(
416  const RTDeviceBinaryImage &Img, const context &Context,
417  const device &Device, const std::string &CompileAndLinkOptions,
418  SerializedObj SpecConsts) {
419  RT::PiProgram NativePrg;
420 
421  auto BinProg = PersistentDeviceCodeCache::getItemFromDisc(
422  Device, Img, SpecConsts, CompileAndLinkOptions);
423  if (BinProg.size()) {
424  // Get program metadata from properties
425  auto ProgMetadata = Img.getProgramMetadata();
426  std::vector<pi_device_binary_property> ProgMetadataVector{
427  ProgMetadata.begin(), ProgMetadata.end()};
428 
429  // TODO: Build for multiple devices once supported by program manager
430  NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
431  (const unsigned char *)BinProg[0].data(),
432  BinProg[0].size(), ProgMetadataVector);
433  } else {
434  NativePrg = createPIProgram(Img, Context, Device);
435  }
436  return {NativePrg, BinProg.size()};
437 }
438 
439 RT::PiProgram ProgramManager::getBuiltPIProgram(
440  OSModuleHandle M, const ContextImplPtr &ContextImpl,
441  const DeviceImplPtr &DeviceImpl, const std::string &KernelName,
442  const program_impl *Prg, bool JITCompilationIsRequired) {
443  // TODO: Make sure that KSIds will be different for the case when the same
444  // kernel built with different options is present in the fat binary.
445  KernelSetId KSId = getKernelSetId(M, KernelName);
446 
447  using PiProgramT = KernelProgramCache::PiProgramT;
448  using ProgramCacheT = KernelProgramCache::ProgramCacheT;
449 
450  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
451 
452  auto AcquireF = [](KernelProgramCache &Cache) {
453  return Cache.acquireCachedPrograms();
454  };
455  auto GetF = [](const Locked<ProgramCacheT> &LockedCache) -> ProgramCacheT & {
456  return LockedCache.get();
457  };
458 
459  std::string CompileOpts;
460  std::string LinkOpts;
461  if (Prg) {
462  CompileOpts = Prg->get_build_options();
463  }
464 
465  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
466 
467  SerializedObj SpecConsts;
468  if (Prg)
469  Prg->stableSerializeSpecConstRegistry(SpecConsts);
470 
471  auto BuildF = [this, &M, &KSId, &ContextImpl, &DeviceImpl, Prg, &CompileOpts,
472  &LinkOpts, &JITCompilationIsRequired, SpecConsts] {
473  auto Context = createSyclObjFromImpl<context>(ContextImpl);
474  auto Device = createSyclObjFromImpl<device>(DeviceImpl);
475 
476  const RTDeviceBinaryImage &Img =
477  getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
478 
479  applyOptionsFromImage(CompileOpts, LinkOpts, Img);
480 
481  const detail::plugin &Plugin = ContextImpl->getPlugin();
482  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
483  Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
484 
485  if (!DeviceCodeWasInCache) {
486  if (Prg)
487  flushSpecConstants(*Prg, NativePrg, &Img);
488  if (Img.supportsSpecConstants())
489  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
490  }
491 
492  ProgramPtr ProgramManaged(
493  NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease);
494 
495  // Link a fallback implementation of device libraries if they are not
496  // supported by a device compiler.
497  // Pre-compiled programs (after AOT compilation or read from persitent
498  // cache) are supposed to be already linked.
499  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
500  // no fallback device library will be linked.
501  uint32_t DeviceLibReqMask = 0;
502  if (!DeviceCodeWasInCache &&
505  DeviceLibReqMask = getDeviceLibReqMask(Img);
506 
507  ProgramPtr BuiltProgram =
508  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
509  getRawSyclObjImpl(Device)->getHandleRef(),
510  ContextImpl->getCachedLibPrograms(), DeviceLibReqMask);
511 
512  {
513  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
514  NativePrograms[BuiltProgram.get()] = &Img;
515  }
516 
517  // Save program to persistent cache if it is not there
518  if (!DeviceCodeWasInCache)
519  PersistentDeviceCodeCache::putItemToDisc(
520  Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
521  return BuiltProgram.release();
522  };
523 
524  const RT::PiDevice PiDevice = DeviceImpl->getHandleRef();
525 
526  auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
527  Cache,
528  std::make_pair(std::make_pair(std::move(SpecConsts), KSId),
529  std::make_pair(PiDevice, CompileOpts + LinkOpts)),
530  AcquireF, GetF, BuildF);
531  return BuildResult->Ptr.load();
532 }
533 
534 std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
535 ProgramManager::getOrCreateKernel(OSModuleHandle M,
536  const ContextImplPtr &ContextImpl,
537  const DeviceImplPtr &DeviceImpl,
538  const std::string &KernelName,
539  const program_impl *Prg) {
540  if (DbgProgMgr > 0) {
541  std::cerr << ">>> ProgramManager::getOrCreateKernel(" << M << ", "
542  << ContextImpl.get() << ", " << DeviceImpl.get() << ", "
543  << KernelName << ")\n";
544  }
545 
546  using PiKernelT = KernelProgramCache::PiKernelT;
547  using KernelCacheT = KernelProgramCache::KernelCacheT;
548  using KernelByNameT = KernelProgramCache::KernelByNameT;
549 
550  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
551 
552  std::string CompileOpts, LinkOpts;
553  SerializedObj SpecConsts;
554  if (Prg) {
555  CompileOpts = Prg->get_build_options();
556  Prg->stableSerializeSpecConstRegistry(SpecConsts);
557  }
558  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
559  const RT::PiDevice PiDevice = DeviceImpl->getHandleRef();
560 
561  auto key = std::make_tuple(std::move(SpecConsts), M, PiDevice,
562  CompileOpts + LinkOpts, KernelName);
563  auto ret_tuple = Cache.tryToGetKernelFast(key);
564  if (std::get<0>(ret_tuple))
565  return ret_tuple;
566 
567  RT::PiProgram Program =
568  getBuiltPIProgram(M, ContextImpl, DeviceImpl, KernelName, Prg);
569 
570  auto AcquireF = [](KernelProgramCache &Cache) {
571  return Cache.acquireKernelsPerProgramCache();
572  };
573  auto GetF =
574  [&Program](const Locked<KernelCacheT> &LockedCache) -> KernelByNameT & {
575  return LockedCache.get()[Program];
576  };
577  auto BuildF = [&Program, &KernelName, &ContextImpl] {
578  PiKernelT *Result = nullptr;
579 
580  // TODO need some user-friendly error/exception
581  // instead of currently obscure one
582  const detail::plugin &Plugin = ContextImpl->getPlugin();
583  Plugin.call<PiApiKind::piKernelCreate>(Program, KernelName.c_str(),
584  &Result);
585 
586  // Some PI Plugins (like OpenCL) require this call to enable USM
587  // For others, PI will turn this into a NOP.
589  sizeof(pi_bool), &PI_TRUE);
590 
591  return Result;
592  };
593 
594  auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
595  Cache, KernelName, AcquireF, GetF, BuildF);
596  auto ret_val = std::make_tuple(BuildResult->Ptr.load(),
597  &(BuildResult->MBuildResultMutex), Program);
598  Cache.saveKernel(key, ret_val);
599  return ret_val;
600 }
601 
603 ProgramManager::getPiProgramFromPiKernel(RT::PiKernel Kernel,
604  const ContextImplPtr Context) {
605  RT::PiProgram Program;
606  const detail::plugin &Plugin = Context->getPlugin();
608  Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, nullptr);
609  return Program;
610 }
611 
612 std::string ProgramManager::getProgramBuildLog(const RT::PiProgram &Program,
613  const ContextImplPtr Context) {
614  size_t PIDevicesSize = 0;
615  const detail::plugin &Plugin = Context->getPlugin();
617  nullptr, &PIDevicesSize);
618  std::vector<RT::PiDevice> PIDevices(PIDevicesSize / sizeof(RT::PiDevice));
620  PIDevicesSize, PIDevices.data(),
621  nullptr);
622  std::string Log = "The program was built for " +
623  std::to_string(PIDevices.size()) + " devices";
624  for (RT::PiDevice &Device : PIDevices) {
625  std::string DeviceBuildInfoString;
626  size_t DeviceBuildInfoStrSize = 0;
628  Program, Device, CL_PROGRAM_BUILD_LOG, 0, nullptr,
629  &DeviceBuildInfoStrSize);
630  if (DeviceBuildInfoStrSize > 0) {
631  std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
633  Program, Device, CL_PROGRAM_BUILD_LOG, DeviceBuildInfoStrSize,
634  DeviceBuildInfo.data(), nullptr);
635  DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
636  }
637 
638  std::string DeviceNameString;
639  size_t DeviceNameStrSize = 0;
641  nullptr, &DeviceNameStrSize);
642  if (DeviceNameStrSize > 0) {
643  std::vector<char> DeviceName(DeviceNameStrSize);
645  DeviceNameStrSize,
646  DeviceName.data(), nullptr);
647  DeviceNameString = std::string(DeviceName.data());
648  }
649  Log += "\nBuild program log for '" + DeviceNameString + "':\n" +
650  DeviceBuildInfoString;
651  }
652  return Log;
653 }
654 
655 // TODO device libraries may use scpecialization constants, manifest files, etc.
656 // To support that they need to be delivered in a different container - so that
657 // pi_device_binary_struct can be created for each of them.
658 static bool loadDeviceLib(const ContextImplPtr Context, const char *Name,
659  RT::PiProgram &Prog) {
660  std::string LibSyclDir = OSUtil::getCurrentDSODir();
661  std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
662  std::ifstream::in | std::ifstream::binary);
663  if (!File.good()) {
664  return false;
665  }
666 
667  File.seekg(0, std::ios::end);
668  size_t FileSize = File.tellg();
669  File.seekg(0, std::ios::beg);
670  std::vector<char> FileContent(FileSize);
671  File.read(&FileContent[0], FileSize);
672  File.close();
673 
674  Prog =
675  createSpirvProgram(Context, (unsigned char *)&FileContent[0], FileSize);
676  return Prog != nullptr;
677 }
678 
679 static const char *getDeviceLibFilename(DeviceLibExt Extension) {
680  switch (Extension) {
681  case DeviceLibExt::cl_intel_devicelib_assert:
682  return "libsycl-fallback-cassert.spv";
683  case DeviceLibExt::cl_intel_devicelib_math:
684  return "libsycl-fallback-cmath.spv";
685  case DeviceLibExt::cl_intel_devicelib_math_fp64:
686  return "libsycl-fallback-cmath-fp64.spv";
687  case DeviceLibExt::cl_intel_devicelib_complex:
688  return "libsycl-fallback-complex.spv";
689  case DeviceLibExt::cl_intel_devicelib_complex_fp64:
690  return "libsycl-fallback-complex-fp64.spv";
691  case DeviceLibExt::cl_intel_devicelib_cstring:
692  return "libsycl-fallback-cstring.spv";
693  }
694  throw compile_program_error("Unhandled (new?) device library extension",
696 }
697 
698 static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) {
699  switch (Extension) {
700  case DeviceLibExt::cl_intel_devicelib_assert:
701  return "cl_intel_devicelib_assert";
702  case DeviceLibExt::cl_intel_devicelib_math:
703  return "cl_intel_devicelib_math";
704  case DeviceLibExt::cl_intel_devicelib_math_fp64:
705  return "cl_intel_devicelib_math_fp64";
706  case DeviceLibExt::cl_intel_devicelib_complex:
707  return "cl_intel_devicelib_complex";
708  case DeviceLibExt::cl_intel_devicelib_complex_fp64:
709  return "cl_intel_devicelib_complex_fp64";
710  case DeviceLibExt::cl_intel_devicelib_cstring:
711  return "cl_intel_devicelib_cstring";
712  }
713  throw compile_program_error("Unhandled (new?) device library extension",
715 }
716 
718  const ContextImplPtr Context, DeviceLibExt Extension,
719  const RT::PiDevice &Device,
720  std::map<std::pair<DeviceLibExt, RT::PiDevice>, RT::PiProgram>
721  &CachedLibPrograms) {
722 
723  const char *LibFileName = getDeviceLibFilename(Extension);
724  auto CacheResult = CachedLibPrograms.emplace(
725  std::make_pair(std::make_pair(Extension, Device), nullptr));
726  bool Cached = !CacheResult.second;
727  auto LibProgIt = CacheResult.first;
728  RT::PiProgram &LibProg = LibProgIt->second;
729 
730  if (Cached)
731  return LibProg;
732 
733  if (!loadDeviceLib(Context, LibFileName, LibProg)) {
734  CachedLibPrograms.erase(LibProgIt);
735  throw compile_program_error(std::string("Failed to load ") + LibFileName,
737  }
738 
739  const detail::plugin &Plugin = Context->getPlugin();
740  // TODO no spec constants are used in the std libraries, support in the future
742  LibProg,
743  /*num devices = */ 1, &Device,
744  // Do not use compile options for library programs: it is not clear
745  // if user options (image options) are supposed to be applied to
746  // library program as well, and what actually happens to a SPIR-V
747  // program if we apply them.
748  "", 0, nullptr, nullptr, nullptr, nullptr);
749  if (Error != PI_SUCCESS) {
750  CachedLibPrograms.erase(LibProgIt);
751  throw compile_program_error(
752  ProgramManager::getProgramBuildLog(LibProg, Context), Error);
753  }
754 
755  return LibProg;
756 }
757 
758 ProgramManager::ProgramManager() {
759  const char *SpvFile = std::getenv(UseSpvEnv);
760  // If a SPIR-V file is specified with an environment variable,
761  // register the corresponding image
762  if (SpvFile) {
763  m_UseSpvFile = true;
764  // The env var requests that the program is loaded from a SPIR-V file on
765  // disk
766  std::ifstream File(SpvFile, std::ios::binary);
767 
768  if (!File.is_open())
769  throw runtime_error(std::string("Can't open file specified via ") +
770  UseSpvEnv + ": " + SpvFile,
772  File.seekg(0, std::ios::end);
773  size_t Size = File.tellg();
774  std::unique_ptr<char[]> Data(new char[Size]);
775  File.seekg(0);
776  File.read(Data.get(), Size);
777  File.close();
778  if (!File.good())
779  throw runtime_error(std::string("read from ") + SpvFile +
780  std::string(" failed"),
782  auto ImgPtr = make_unique_ptr<DynRTDeviceBinaryImage>(
783  std::move(Data), Size, OSUtil::DummyModuleHandle);
784 
785  if (DbgProgMgr > 0) {
786  std::cerr << "loaded device image binary from " << SpvFile << "\n";
787  std::cerr << "format: " << getFormatStr(ImgPtr->getFormat()) << "\n";
788  }
789  // No need for a mutex here since all access to these private fields is
790  // blocked until the construction of the ProgramManager singleton is
791  // finished.
792  m_DeviceImages[SpvFileKSId].reset(
793  new std::vector<RTDeviceBinaryImageUPtr>());
794  m_DeviceImages[SpvFileKSId]->push_back(std::move(ImgPtr));
795  }
796 }
797 
799 ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId,
800  const context &Context, const device &Device,
801  bool JITCompilationIsRequired) {
802  if (DbgProgMgr > 0) {
803  std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \"" << KSId
804  << "\", " << getRawSyclObjImpl(Context) << ", "
805  << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired
806  << ")\n";
807 
808  std::cerr << "available device images:\n";
809  debugPrintBinaryImages();
810  }
811  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
812  std::vector<RTDeviceBinaryImageUPtr> &Imgs = *m_DeviceImages[KSId];
813  const ContextImplPtr Ctx = getSyclObjImpl(Context);
814  pi_uint32 ImgInd = 0;
815  RTDeviceBinaryImage *Img = nullptr;
816 
817  // TODO: There may be cases with cl::sycl::program class usage in source code
818  // that will result in a multi-device context. This case needs to be handled
819  // here or at the program_impl class level
820 
821  // Ask the native runtime under the given context to choose the device image
822  // it prefers.
823  std::vector<pi_device_binary> RawImgs(Imgs.size());
824  for (unsigned I = 0; I < Imgs.size(); I++)
825  RawImgs[I] = const_cast<pi_device_binary>(&Imgs[I]->getRawData());
826 
827  Ctx->getPlugin().call<PiApiKind::piextDeviceSelectBinary>(
828  getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
829  (cl_uint)RawImgs.size(), &ImgInd);
830 
831  if (JITCompilationIsRequired) {
832  // If the image is already compiled with AOT, throw an exception.
833  const pi_device_binary_struct &RawImg = Imgs[ImgInd]->getRawData();
834  if ((strcmp(RawImg.DeviceTargetSpec,
836  (strcmp(RawImg.DeviceTargetSpec,
838  (strcmp(RawImg.DeviceTargetSpec,
840  throw feature_not_supported("Recompiling AOT image is not supported",
842  }
843  }
844 
845  Img = Imgs[ImgInd].get();
846 
847  if (DbgProgMgr > 0) {
848  std::cerr << "selected device image: " << &Img->getRawData() << "\n";
849  Img->print();
850  }
851 
852  if (std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile)
853  dumpImage(*Img, KSId);
854  return *Img;
855 }
856 
857 static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) {
858  uint32_t Mask =
859  0x1 << (static_cast<uint32_t>(Ext) -
860  static_cast<uint32_t>(DeviceLibExt::cl_intel_devicelib_assert));
861  return ((DeviceLibReqMask & Mask) == Mask);
862 }
863 
864 static std::vector<RT::PiProgram> getDeviceLibPrograms(
865  const ContextImplPtr Context, const RT::PiDevice &Device,
866  std::map<std::pair<DeviceLibExt, RT::PiDevice>, RT::PiProgram>
867  &CachedLibPrograms,
868  uint32_t DeviceLibReqMask) {
869  std::vector<RT::PiProgram> Programs;
870 
871  std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
872  {DeviceLibExt::cl_intel_devicelib_assert,
873  /* is fallback loaded? */ false},
874  {DeviceLibExt::cl_intel_devicelib_math, false},
875  {DeviceLibExt::cl_intel_devicelib_math_fp64, false},
876  {DeviceLibExt::cl_intel_devicelib_complex, false},
877  {DeviceLibExt::cl_intel_devicelib_complex_fp64, false},
878  {DeviceLibExt::cl_intel_devicelib_cstring, false}};
879 
880  // Disable all devicelib extensions requiring fp64 support if at least
881  // one underlying device doesn't support cl_khr_fp64.
882  std::string DevExtList =
884  Device, Context->getPlugin());
885  const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64"));
886 
887  // Load a fallback library for an extension if the device does not
888  // support it.
889  for (auto &Pair : RequiredDeviceLibExt) {
890  DeviceLibExt Ext = Pair.first;
891  bool &FallbackIsLoaded = Pair.second;
892 
893  if (FallbackIsLoaded) {
894  continue;
895  }
896 
897  if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) {
898  continue;
899  }
900  if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 ||
901  Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) &&
902  !fp64Support) {
903  continue;
904  }
905 
906  const char *ExtStr = getDeviceLibExtensionStr(Ext);
907 
908  bool InhibitNativeImpl = false;
909  if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) {
910  InhibitNativeImpl = strstr(Env, ExtStr) != nullptr;
911  }
912 
913  bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr);
914 
915  if (!DeviceSupports || InhibitNativeImpl) {
916  Programs.push_back(
917  loadDeviceLibFallback(Context, Ext, Device, CachedLibPrograms));
918  FallbackIsLoaded = true;
919  }
920  }
921  return Programs;
922 }
923 
924 ProgramManager::ProgramPtr ProgramManager::build(
925  ProgramPtr Program, const ContextImplPtr Context,
926  const std::string &CompileOptions, const std::string &LinkOptions,
927  const RT::PiDevice &Device,
928  std::map<std::pair<DeviceLibExt, RT::PiDevice>, RT::PiProgram>
929  &CachedLibPrograms,
930  uint32_t DeviceLibReqMask) {
931 
932  if (DbgProgMgr > 0) {
933  std::cerr << ">>> ProgramManager::build(" << Program.get() << ", "
934  << CompileOptions << ", " << LinkOptions << ", ... " << Device
935  << ")\n";
936  }
937 
938  bool LinkDeviceLibs = (DeviceLibReqMask != 0);
939 
940  // TODO: Currently, online linking isn't implemented yet on Level Zero.
941  // To enable device libraries and unify the behaviors on all backends,
942  // online linking is disabled temporarily, all fallback device libraries
943  // will be linked offline. When Level Zero supports online linking, we need
944  // to remove the line of code below and switch back to online linking.
945  LinkDeviceLibs = false;
946 
947  // TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
948  // We do not link with other device libraries, because it may fail
949  // due to unrecognized SPIR-V format of those libraries.
950  if (CompileOptions.find(std::string("-cmc")) != std::string::npos ||
951  CompileOptions.find(std::string("-vc-codegen")) != std::string::npos)
952  LinkDeviceLibs = false;
953 
954  std::vector<RT::PiProgram> LinkPrograms;
955  if (LinkDeviceLibs) {
956  LinkPrograms = getDeviceLibPrograms(Context, Device, CachedLibPrograms,
957  DeviceLibReqMask);
958  }
959 
960  const detail::plugin &Plugin = Context->getPlugin();
961  if (LinkPrograms.empty()) {
962  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piProgramBuild>(
963  Program.get(), /*num devices =*/1, &Device, CompileOptions.c_str(),
964  nullptr, nullptr);
965  if (Error != PI_SUCCESS)
966  throw compile_program_error(getProgramBuildLog(Program.get(), Context),
967  Error);
968  return Program;
969  }
970 
971  // Include the main program and compile/link everything together
972  Plugin.call<PiApiKind::piProgramCompile>(Program.get(), /*num devices =*/1,
973  &Device, CompileOptions.c_str(), 0,
974  nullptr, nullptr, nullptr, nullptr);
975  LinkPrograms.push_back(Program.get());
976 
977  RT::PiProgram LinkedProg = nullptr;
978  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piProgramLink>(
979  Context->getHandleRef(), /*num devices =*/1, &Device, LinkOptions.c_str(),
980  LinkPrograms.size(), LinkPrograms.data(), nullptr, nullptr, &LinkedProg);
981 
982  // Link program call returns a new program object if all parameters are valid,
983  // or NULL otherwise. Release the original (user) program.
984  Program.reset(LinkedProg);
985  if (Error != PI_SUCCESS) {
986  if (LinkedProg) {
987  // A non-trivial error occurred during linkage: get a build log, release
988  // an incomplete (but valid) LinkedProg, and throw.
989  throw compile_program_error(getProgramBuildLog(LinkedProg, Context),
990  Error);
991  }
992  Plugin.checkPiResult(Error);
993  }
994  return Program;
995 }
996 
997 static ProgramManager::KernelArgMask
999  const int NBytesForSize = 8;
1000  const int NBitsInElement = 8;
1001  std::uint64_t SizeInBits = 0;
1002  for (int I = 0; I < NBytesForSize; ++I)
1003  SizeInBits |= static_cast<std::uint64_t>(Bytes[I]) << I * NBitsInElement;
1004 
1006  for (std::uint64_t I = 0; I < SizeInBits; ++I) {
1007  std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
1008  Result.push_back(Byte & (1 << (I % NBitsInElement)));
1009  }
1010 
1011  return Result;
1012 }
1013 
1014 void ProgramManager::cacheKernelUsesAssertInfo(OSModuleHandle M,
1015  RTDeviceBinaryImage &Img) {
1016  const pi::DeviceBinaryImage::PropertyRange &AssertUsedRange =
1017  Img.getAssertUsed();
1018  if (AssertUsedRange.isAvailable())
1019  for (const auto &Prop : AssertUsedRange) {
1020  KernelNameWithOSModule Key{Prop->Name, M};
1021  m_KernelUsesAssert.insert(Key);
1022  }
1023 }
1024 
1025 bool ProgramManager::kernelUsesAssert(OSModuleHandle M,
1026  const std::string &KernelName) const {
1027  KernelNameWithOSModule Key{KernelName, M};
1028  return m_KernelUsesAssert.find(Key) != m_KernelUsesAssert.end();
1029 }
1030 
1031 void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
1032  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1033 
1034  for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) {
1035  pi_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]);
1036  OSModuleHandle M = OSUtil::getOSModuleHandle(RawImg);
1037  const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
1038  const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
1039  auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);
1040 
1041  // Fill the kernel argument mask map
1042  const pi::DeviceBinaryImage::PropertyRange &KPOIRange =
1043  Img->getKernelParamOptInfo();
1044  if (KPOIRange.isAvailable()) {
1045  KernelNameToArgMaskMap &ArgMaskMap =
1046  m_EliminatedKernelArgMasks[Img.get()];
1047  for (const auto &Info : KPOIRange)
1048  ArgMaskMap[Info->Name] =
1049  createKernelArgMask(pi::DeviceBinaryProperty(Info).asByteArray());
1050  }
1051  // Use the entry information if it's available
1052  if (EntriesB != EntriesE) {
1053  // The kernel sets for any pair of images are either disjoint or
1054  // identical, look up the kernel set using the first kernel name...
1055  StrToKSIdMap &KSIdMap = m_KernelSets[M];
1056  auto KSIdIt = KSIdMap.find(EntriesB->name);
1057  if (KSIdIt != KSIdMap.end()) {
1058  for (_pi_offload_entry EntriesIt = EntriesB + 1; EntriesIt != EntriesE;
1059  ++EntriesIt)
1060  assert(KSIdMap[EntriesIt->name] == KSIdIt->second &&
1061  "Kernel sets are not disjoint");
1062  auto &Imgs = m_DeviceImages[KSIdIt->second];
1063  assert(Imgs && "Device image vector should have been already created");
1064 
1065  cacheKernelUsesAssertInfo(M, *Img);
1066 
1067  Imgs->push_back(std::move(Img));
1068  continue;
1069  }
1070  // ... or create the set first if it hasn't been
1071  KernelSetId KSId = getNextKernelSetId();
1072  {
1073  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1074 
1075  // Register all exported symbols
1076  auto ExportedSymbols = Img->getExportedSymbols();
1077  for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols)
1078  m_ExportedSymbols.insert(ExportedSymbol->Name);
1079 
1080  for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1081  ++EntriesIt) {
1082  auto Result = KSIdMap.insert(std::make_pair(EntriesIt->name, KSId));
1083  (void)Result;
1084  assert(Result.second && "Kernel sets are not disjoint");
1085 
1086  // Skip creating unique kernel ID if it is a service kernel.
1087  // SYCL service kernels are identified by having
1088  // __sycl_service_kernel__ in the mangled name, primarily as part of
1089  // the namespace of the name type.
1090  if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
1091  m_ServiceKernels.insert(EntriesIt->name);
1092  continue;
1093  }
1094 
1095  // Skip creating unique kernel ID if it is an exported device
1096  // function. Exported device functions appear in the offload entries
1097  // among kernels, but are identifiable by being listed in properties.
1098  if (m_ExportedSymbols.find(EntriesIt->name) !=
1099  m_ExportedSymbols.end())
1100  continue;
1101 
1102  // ... and create a unique kernel ID for the entry
1103  std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1104  std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1105  sycl::kernel_id KernelID =
1106  detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1107  m_KernelIDs.insert(
1108  std::make_pair(EntriesIt->name, std::move(KernelID)));
1109  }
1110  }
1111  m_DeviceImages[KSId].reset(new std::vector<RTDeviceBinaryImageUPtr>());
1112 
1113  cacheKernelUsesAssertInfo(M, *Img);
1114 
1115  m_DeviceImages[KSId]->push_back(std::move(Img));
1116  continue;
1117  }
1118  // Otherwise assume that the image contains all kernels associated with the
1119  // module
1120  KernelSetId &KSId = m_OSModuleKernelSets[M];
1121  if (KSId == 0)
1122  KSId = getNextKernelSetId();
1123 
1124  auto &Imgs = m_DeviceImages[KSId];
1125  if (!Imgs)
1126  Imgs.reset(new std::vector<RTDeviceBinaryImageUPtr>());
1127 
1128  cacheKernelUsesAssertInfo(M, *Img);
1129 
1130  Imgs->push_back(std::move(Img));
1131  }
1132 }
1133 
1134 void ProgramManager::debugPrintBinaryImages() const {
1135  for (const auto &ImgVecIt : m_DeviceImages) {
1136  std::cerr << " ++++++ Kernel set: " << ImgVecIt.first << "\n";
1137  for (const auto &Img : *ImgVecIt.second)
1138  Img.get()->print();
1139  }
1140 }
1141 
1142 KernelSetId ProgramManager::getNextKernelSetId() const {
1143  // No need for atomic, should be guarded by the caller
1144  static KernelSetId Result = LastKSId;
1145  return ++Result;
1146 }
1147 
1149 ProgramManager::getKernelSetId(OSModuleHandle M,
1150  const std::string &KernelName) const {
1151  // If the env var instructs to use image from a file,
1152  // return the kernel set associated with it
1153  if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1154  return SpvFileKSId;
1155  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1156  auto KSIdMapIt = m_KernelSets.find(M);
1157  if (KSIdMapIt != m_KernelSets.end()) {
1158  const StrToKSIdMap &KSIdMap = KSIdMapIt->second;
1159  auto KSIdIt = KSIdMap.find(KernelName);
1160  // If the kernel has been assigned to a kernel set, return it
1161  if (KSIdIt != KSIdMap.end())
1162  return KSIdIt->second;
1163  }
1164  // If no kernel set was found check if there is a kernel set containing
1165  // all kernels in the given module
1166  auto ModuleKSIdIt = m_OSModuleKernelSets.find(M);
1167  if (ModuleKSIdIt != m_OSModuleKernelSets.end())
1168  return ModuleKSIdIt->second;
1169 
1170  throw runtime_error("No kernel named " + KernelName + " was found",
1172 }
1173 
1174 void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img,
1175  KernelSetId KSId) const {
1176  std::string Fname("sycl_");
1177  const pi_device_binary_struct &RawImg = Img.getRawData();
1178  Fname += RawImg.DeviceTargetSpec;
1179  Fname += std::to_string(KSId);
1180  std::string Ext;
1181 
1182  RT::PiDeviceBinaryType Format = Img.getFormat();
1183  if (Format == PI_DEVICE_BINARY_TYPE_SPIRV)
1184  Ext = ".spv";
1185  else if (Format == PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE)
1186  Ext = ".bc";
1187  else
1188  Ext = ".bin";
1189  Fname += Ext;
1190 
1191  std::ofstream F(Fname, std::ios::binary);
1192 
1193  if (!F.is_open()) {
1194  throw runtime_error("Can not write " + Fname, PI_ERROR_UNKNOWN);
1195  }
1196  Img.dump(F);
1197  F.close();
1198 }
1199 
1200 void ProgramManager::flushSpecConstants(const program_impl &Prg,
1201  RT::PiProgram NativePrg,
1202  const RTDeviceBinaryImage *Img) {
1203  if (DbgProgMgr > 2) {
1204  std::cerr << ">>> ProgramManager::flushSpecConstants(" << Prg.get()
1205  << ",...)\n";
1206  }
1207  if (!Prg.hasSetSpecConstants())
1208  return; // nothing to do
1209  pi::PiProgram PrgHandle = Prg.getHandleRef();
1210  // program_impl can't correspond to two different native programs
1211  assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1212  NativePrg = NativePrg ? NativePrg : PrgHandle;
1213 
1214  if (!Img) {
1215  // caller hasn't provided the image object - find it
1216  { // make sure NativePrograms map access is synchronized
1217  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1218  auto It = NativePrograms.find(NativePrg);
1219  if (It == NativePrograms.end())
1221  "spec constant is set in a program w/o a binary image",
1223  Img = It->second;
1224  }
1225  if (!Img->supportsSpecConstants()) {
1226  if (DbgProgMgr > 0)
1227  std::cerr << ">>> ProgramManager::flushSpecConstants: binary image "
1228  << &Img->getRawData() << " doesn't support spec constants\n";
1229  // This device binary image does not support runtime setting of
1230  // specialization constants; compiler must have generated default values.
1231  // NOTE: Can't throw here, as it would always take place with AOT
1232  //-compiled code. New Khronos 2020 spec should fix this inconsistency.
1233  return;
1234  }
1235  }
1236  Prg.flush_spec_constants(*Img, NativePrg);
1237 }
1238 
1239 // If the kernel is loaded from spv file, it may not include DeviceLib require
1240 // mask, sycl runtime won't know which fallback device libraries are needed. In
1241 // such case, the safest way is to load all fallback device libraries.
1242 uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) {
1243  const pi::DeviceBinaryImage::PropertyRange &DLMRange =
1244  Img.getDeviceLibReqMask();
1245  if (DLMRange.isAvailable())
1246  return pi::DeviceBinaryProperty(*(DLMRange.begin())).asUint32();
1247  else
1248  return 0xFFFFFFFF;
1249 }
1250 
1251 // TODO consider another approach with storing the masks in the integration
1252 // header instead.
1253 ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask(
1254  OSModuleHandle M, pi::PiProgram NativePrg, const std::string &KernelName) {
1255  // If instructed to use a spv file, assume no eliminated arguments.
1256  if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1257  return {};
1258 
1259  // Bail out if there are no eliminated kernel arg masks in our images
1260  if (m_EliminatedKernelArgMasks.empty())
1261  return {};
1262 
1263  {
1264  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1265  auto ImgIt = NativePrograms.find(NativePrg);
1266  if (ImgIt != NativePrograms.end()) {
1267  auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1268  if (MapIt != m_EliminatedKernelArgMasks.end())
1269  return MapIt->second[KernelName];
1270  return {};
1271  }
1272  }
1273 
1274  // If the program was not cached iterate over all available images looking for
1275  // the requested kernel
1276  for (auto &Elem : m_EliminatedKernelArgMasks) {
1277  auto ArgMask = Elem.second.find(KernelName);
1278  if (ArgMask != Elem.second.end())
1279  return ArgMask->second;
1280  }
1281 
1282  // The kernel is not generated by DPCPP stack, so a mask doesn't exist for it
1283  return {};
1284 }
1285 
1287  auto IsAOTBinary = [](const char *Format) {
1288  return (
1289  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) ||
1290  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) ||
1291  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0));
1292  };
1293 
1294  // There are only two initial states so far - SPIRV which needs to be compiled
1295  // and linked and fully compiled(AOTed) binary
1296 
1297  const bool IsAOT = IsAOTBinary(BinImage->getRawData().DeviceTargetSpec);
1298 
1299  return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1300 }
1301 
1303  const device &Dev) {
1304  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1306  auto &Plugin = DeviceImpl->getPlugin();
1307 
1308  const RT::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef();
1309 
1310  // Call piextDeviceSelectBinary with only one image to check if an image is
1311  // compatible with implementation. The function returns invalid index if no
1312  // device images are compatible.
1313  pi_uint32 SuitableImageID = std::numeric_limits<pi_uint32>::max();
1314  pi_device_binary DevBin =
1315  const_cast<pi_device_binary>(&BinImage->getRawData());
1317  PIDeviceHandle, &DevBin,
1318  /*num bin images = */ (cl_uint)1, &SuitableImageID);
1319  return (0 == SuitableImageID);
1320 }
1321 
1322 kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) {
1323  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1324 
1325  auto KernelID = m_KernelIDs.find(KernelName);
1326  if (KernelID == m_KernelIDs.end())
1327  throw runtime_error("No kernel found with the specified name",
1329 
1330  return KernelID->second;
1331 }
1332 
1333 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1334  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1335 
1336  std::vector<sycl::kernel_id> AllKernelIDs;
1337  AllKernelIDs.reserve(m_KernelIDs.size());
1338  for (std::pair<std::string, kernel_id> KernelID : m_KernelIDs) {
1339  AllKernelIDs.push_back(KernelID.second);
1340  }
1341  return AllKernelIDs;
1342 }
1343 
1344 std::vector<device_image_plain>
1345 ProgramManager::getSYCLDeviceImagesWithCompatibleState(
1346  const context &Ctx, const std::vector<device> &Devs,
1347  bundle_state TargetState) {
1348 
1349  // Collect raw device images
1350  std::vector<RTDeviceBinaryImage *> BinImages;
1351  {
1352  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1353  for (auto &ImagesSets : m_DeviceImages) {
1354  auto &ImagesUPtrs = *ImagesSets.second.get();
1355  for (auto &ImageUPtr : ImagesUPtrs) {
1356  const RTDeviceBinaryImage *BinImage = ImageUPtr.get();
1357  const bundle_state ImgState = getBinImageState(BinImage);
1358 
1359  // Ignore images with incompatible state. Image is considered compatible
1360  // with a target state if an image is already in the target state or can
1361  // be brought to target state by compiling/linking/building.
1362  //
1363  // Example: an image in "executable" state is not compatible with
1364  // "input" target state - there is no operation to convert the image it
1365  // to "input" state. An image in "input" state is compatible with
1366  // "executable" target state because it can be built to get into
1367  // "executable" state.
1368  if (ImgState > TargetState)
1369  continue;
1370 
1371  BinImages.push_back(ImageUPtr.get());
1372  }
1373  }
1374  }
1375  // TODO: Add a diagnostic on multiple device images with conflicting kernel
1376  // names, and remove OSModuleHandle usage, as conflicting kernel names will be
1377  // an error.
1378 
1379  // TODO: Cache device_image objects
1380  // Create SYCL device image from those that have compatible state and at least
1381  // one device
1382  std::vector<device_image_plain> SYCLDeviceImages;
1383  for (RTDeviceBinaryImage *BinImage : BinImages) {
1384  const bundle_state ImgState = getBinImageState(BinImage);
1385 
1386  for (const sycl::device &Dev : Devs) {
1387  if (!compatibleWithDevice(BinImage, Dev))
1388  continue;
1389 
1390  std::vector<sycl::kernel_id> KernelIDs;
1391  // Collect kernel names for the image
1392  pi_device_binary DevBin =
1393  const_cast<pi_device_binary>(&BinImage->getRawData());
1394  {
1395  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1396  for (_pi_offload_entry EntriesIt = DevBin->EntriesBegin;
1397  EntriesIt != DevBin->EntriesEnd; ++EntriesIt) {
1398  auto KernelID = m_KernelIDs.find(EntriesIt->name);
1399 
1400  if (KernelID == m_KernelIDs.end()) {
1401  // Service kernels and exported symbols do not have kernel IDs
1402  assert((m_ServiceKernels.find(EntriesIt->name) !=
1403  m_ServiceKernels.end() ||
1404  m_ExportedSymbols.find(EntriesIt->name) !=
1405  m_ExportedSymbols.end()) &&
1406  "Kernel ID in device binary missing from cache");
1407  continue;
1408  }
1409 
1410  KernelIDs.push_back(KernelID->second);
1411  }
1412  }
1413 
1414  // If the image does not contain any non-service kernels we can skip it.
1415  if (KernelIDs.empty())
1416  continue;
1417 
1418  // device_image_impl expects kernel ids to be sorted for fast search
1419  std::sort(KernelIDs.begin(), KernelIDs.end(), LessByNameComp{});
1420 
1421  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1422  BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
1423 
1424  SYCLDeviceImages.push_back(
1425  createSyclObjFromImpl<device_image_plain>(Impl));
1426  break;
1427  }
1428  }
1429 
1430  return SYCLDeviceImages;
1431 }
1432 
1433 void ProgramManager::bringSYCLDeviceImagesToState(
1434  std::vector<device_image_plain> &DeviceImages, bundle_state TargetState) {
1435 
1436  for (device_image_plain &DevImage : DeviceImages) {
1437  const bundle_state DevImageState = getSyclObjImpl(DevImage)->get_state();
1438 
1439  switch (TargetState) {
1440  case bundle_state::input:
1441  // Do nothing since there is no state which can be upgraded to the input.
1442  assert(DevImageState == bundle_state::input);
1443  break;
1444  case bundle_state::object:
1445  if (DevImageState == bundle_state::input) {
1446  DevImage = compile(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1447  /*PropList=*/{});
1448  break;
1449  }
1450  // Device image is expected to be object state then.
1451  assert(DevImageState == bundle_state::object);
1452  break;
1453  case bundle_state::executable: {
1454  switch (DevImageState) {
1455  case bundle_state::input:
1456  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1457  /*PropList=*/{});
1458  break;
1459  case bundle_state::object: {
1460  std::vector<device_image_plain> LinkedDevImages =
1461  link({DevImage}, getSyclObjImpl(DevImage)->get_devices(),
1462  /*PropList=*/{});
1463  // Since only one device image is passed here one output device image is
1464  // expected
1465  assert(LinkedDevImages.size() == 1 && "Expected one linked image here");
1466  DevImage = LinkedDevImages[0];
1467  break;
1468  }
1469  case bundle_state::executable:
1470  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1471  /*PropList=*/{});
1472  break;
1473  }
1474  break;
1475  }
1476  }
1477  }
1478 }
1479 
1480 std::vector<device_image_plain>
1481 ProgramManager::getSYCLDeviceImages(const context &Ctx,
1482  const std::vector<device> &Devs,
1483  bundle_state TargetState) {
1484  // Collect device images with compatible state
1485  std::vector<device_image_plain> DeviceImages =
1486  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1487  // Brind device images with compatible state to desired state
1488  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1489  return DeviceImages;
1490 }
1491 
1492 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1493  const context &Ctx, const std::vector<device> &Devs,
1494  const DevImgSelectorImpl &Selector, bundle_state TargetState) {
1495  // Collect device images with compatible state
1496  std::vector<device_image_plain> DeviceImages =
1497  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1498 
1499  // Filter out images that are rejected by Selector
1500  auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1501  [&Selector](const device_image_plain &Image) {
1502  return !Selector(getSyclObjImpl(Image));
1503  });
1504  DeviceImages.erase(It, DeviceImages.end());
1505 
1506  // The spec says that the function should not call online compiler or linker
1507  // to translate device images into target state
1508  return DeviceImages;
1509 }
1510 
1511 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1512  const context &Ctx, const std::vector<device> &Devs,
1513  const std::vector<kernel_id> &KernelIDs, bundle_state TargetState) {
1514  // Collect device images with compatible state
1515  std::vector<device_image_plain> DeviceImages =
1516  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1517 
1518  // Filter out images that have no kernel_ids specified
1519  auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1520  [&KernelIDs](const device_image_plain &Image) {
1521  return std::none_of(
1522  KernelIDs.begin(), KernelIDs.end(),
1523  [&Image](const sycl::kernel_id &KernelID) {
1524  return Image.has_kernel(KernelID);
1525  });
1526  });
1527 
1528  DeviceImages.erase(It, DeviceImages.end());
1529 
1530  // Brind device images with compatible state to desired state
1531  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1532  return DeviceImages;
1533 }
1534 
1537  const std::vector<device> &Devs,
1538  const property_list &) {
1539 
1540  // TODO: Extract compile options from property list once the Spec clarifies
1541  // how they can be passed.
1542 
1543  // TODO: Probably we could have cached compiled device images.
1544  const std::shared_ptr<device_image_impl> &InputImpl =
1545  getSyclObjImpl(DeviceImage);
1546 
1547  const detail::plugin &Plugin =
1548  getSyclObjImpl(InputImpl->get_context())->getPlugin();
1549 
1550  // TODO: Add support for creating non-SPIRV programs from multiple devices.
1551  if (InputImpl->get_bin_image_ref()->getFormat() !=
1553  Devs.size() > 1)
1554  sycl::runtime_error(
1555  "Creating a program from AOT binary for multiple device is not "
1556  "supported",
1558 
1559  // Device is not used when creating program from SPIRV, so passing only one
1560  // device is OK.
1561  RT::PiProgram Prog = createPIProgram(*InputImpl->get_bin_image_ref(),
1562  InputImpl->get_context(), Devs[0]);
1563 
1564  if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
1565  enableITTAnnotationsIfNeeded(Prog, Plugin);
1566 
1567  DeviceImageImplPtr ObjectImpl = std::make_shared<detail::device_image_impl>(
1568  InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
1569  bundle_state::object, InputImpl->get_kernel_ids_ref(), Prog,
1570  InputImpl->get_spec_const_data_ref(),
1571  InputImpl->get_spec_const_blob_ref());
1572 
1573  std::vector<pi_device> PIDevices;
1574  PIDevices.reserve(Devs.size());
1575  for (const device &Dev : Devs)
1576  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
1577 
1578  // TODO: Set spec constatns here.
1579 
1580  // TODO: Handle zero sized Device list.
1582  ObjectImpl->get_program_ref(), /*num devices=*/Devs.size(),
1583  PIDevices.data(),
1584  /*options=*/nullptr,
1585  /*num_input_headers=*/0, /*input_headers=*/nullptr,
1586  /*header_include_names=*/nullptr,
1587  /*pfn_notify=*/nullptr, /*user_data*/ nullptr);
1588  if (Error != PI_SUCCESS)
1589  throw sycl::exception(
1591  getProgramBuildLog(ObjectImpl->get_program_ref(),
1592  getSyclObjImpl(ObjectImpl->get_context())));
1593 
1594  return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
1595 }
1596 
1597 std::vector<device_image_plain>
1598 ProgramManager::link(const std::vector<device_image_plain> &DeviceImages,
1599  const std::vector<device> &Devs,
1600  const property_list &PropList) {
1601  (void)PropList;
1602 
1603  std::vector<pi_program> PIPrograms;
1604  PIPrograms.reserve(DeviceImages.size());
1605  for (const device_image_plain &DeviceImage : DeviceImages)
1606  PIPrograms.push_back(getSyclObjImpl(DeviceImage)->get_program_ref());
1607 
1608  std::vector<pi_device> PIDevices;
1609  PIDevices.reserve(Devs.size());
1610  for (const device &Dev : Devs)
1611  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
1612 
1613  const context &Context = getSyclObjImpl(DeviceImages[0])->get_context();
1614  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
1615 
1616  const detail::plugin &Plugin = ContextImpl->getPlugin();
1617 
1618  RT::PiProgram LinkedProg = nullptr;
1620  ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
1621  /*options=*/nullptr, PIPrograms.size(), PIPrograms.data(),
1622  /*pfn_notify=*/nullptr,
1623  /*user_data=*/nullptr, &LinkedProg);
1624 
1625  if (Error != PI_SUCCESS) {
1626  if (LinkedProg) {
1627  const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
1628  throw sycl::exception(make_error_code(errc::build), ErrorMsg);
1629  }
1630  Plugin.reportPiError(Error, "link()");
1631  }
1632 
1633  std::vector<kernel_id> KernelIDs;
1634  for (const device_image_plain &DeviceImage : DeviceImages) {
1635  // Duplicates are not expected here, otherwise piProgramLink should fail
1636  KernelIDs.insert(KernelIDs.end(),
1637  getSyclObjImpl(DeviceImage)->get_kernel_ids().begin(),
1638  getSyclObjImpl(DeviceImage)->get_kernel_ids().end());
1639  }
1640  // device_image_impl expects kernel ids to be sorted for fast search
1641  std::sort(KernelIDs.begin(), KernelIDs.end(), LessByNameComp{});
1642 
1643  DeviceImageImplPtr ExecutableImpl =
1644  std::make_shared<detail::device_image_impl>(
1645  /*BinImage=*/nullptr, Context, Devs, bundle_state::executable,
1646  std::move(KernelIDs), LinkedProg);
1647 
1648  // TODO: Make multiple sets of device images organized by devices they are
1649  // compiled for.
1650  return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
1651 }
1652 
1653 // The function duplicates most of the code from existing getBuiltPIProgram.
1654 // The differences are:
1655 // Different API - uses different objects to extract required info
1656 // Supports caching of a program built for multiple devices
1658  const std::vector<device> &Devs,
1659  const property_list &PropList) {
1660  (void)PropList;
1661 
1662  const std::shared_ptr<device_image_impl> &InputImpl =
1663  getSyclObjImpl(DeviceImage);
1664 
1665  const context Context = InputImpl->get_context();
1666 
1667  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
1668 
1669  using PiProgramT = KernelProgramCache::PiProgramT;
1670  using ProgramCacheT = KernelProgramCache::ProgramCacheT;
1671 
1672  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
1673 
1674  auto AcquireF = [](KernelProgramCache &Cache) {
1675  return Cache.acquireCachedPrograms();
1676  };
1677  auto GetF = [](const Locked<ProgramCacheT> &LockedCache) -> ProgramCacheT & {
1678  return LockedCache.get();
1679  };
1680 
1681  std::string CompileOpts;
1682  std::string LinkOpts;
1683  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
1684 
1685  const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref();
1686  const RTDeviceBinaryImage &Img = *ImgPtr;
1687 
1688  SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
1689 
1690  // TODO: Unify this code with getBuiltPIProgram
1691  auto BuildF = [this, &Context, Img, &Devs, &CompileOpts, &LinkOpts,
1692  &InputImpl, SpecConsts] {
1693  applyOptionsFromImage(CompileOpts, LinkOpts, Img);
1694  ContextImplPtr ContextImpl = getSyclObjImpl(Context);
1695  const detail::plugin &Plugin = ContextImpl->getPlugin();
1696 
1697  // TODO: Add support for creating non-SPIRV programs from multiple devices.
1698  if (InputImpl->get_bin_image_ref()->getFormat() !=
1700  Devs.size() > 1)
1701  sycl::runtime_error(
1702  "Creating a program from AOT binary for multiple device is not "
1703  "supported",
1705 
1706  // Device is not used when creating program from SPIRV, so passing only one
1707  // device is OK.
1708  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
1709  Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
1710 
1711  if (!DeviceCodeWasInCache) {
1712  if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
1713  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
1714 
1715  {
1716  std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
1717  const std::map<std::string,
1718  std::vector<device_image_impl::SpecConstDescT>>
1719  &SpecConstData = InputImpl->get_spec_const_data_ref();
1720 
1721  for (const auto &DescPair : SpecConstData) {
1722  for (const device_image_impl::SpecConstDescT &SpecIDDesc :
1723  DescPair.second) {
1724  if (SpecIDDesc.IsSet) {
1726  NativePrg, SpecIDDesc.ID, SpecIDDesc.Size,
1727  SpecConsts.data() + SpecIDDesc.BlobOffset);
1728  }
1729  }
1730  }
1731  }
1732  }
1733 
1734  ProgramPtr ProgramManaged(
1735  NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease);
1736 
1737  // Link a fallback implementation of device libraries if they are not
1738  // supported by a device compiler.
1739  // Pre-compiled programs are supposed to be already linked.
1740  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
1741  // no fallback device library will be linked.
1742  uint32_t DeviceLibReqMask = 0;
1743  if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV &&
1745  DeviceLibReqMask = getDeviceLibReqMask(Img);
1746 
1747  ProgramPtr BuiltProgram =
1748  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
1749  getRawSyclObjImpl(Devs[0])->getHandleRef(),
1750  ContextImpl->getCachedLibPrograms(), DeviceLibReqMask);
1751 
1752  {
1753  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1754  NativePrograms[BuiltProgram.get()] = &Img;
1755  }
1756 
1757  // Save program to persistent cache if it is not there
1758  if (!DeviceCodeWasInCache)
1759  PersistentDeviceCodeCache::putItemToDisc(
1760  Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
1761 
1762  return BuiltProgram.release();
1763  };
1764 
1765  const RT::PiDevice PiDevice = getRawSyclObjImpl(Devs[0])->getHandleRef();
1766  // TODO: Throw SYCL2020 style exception
1767  auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
1768  Cache,
1769  std::make_pair(std::make_pair(std::move(SpecConsts), (size_t)ImgPtr),
1770  std::make_pair(PiDevice, CompileOpts + LinkOpts)),
1771  AcquireF, GetF, BuildF);
1772 
1773  RT::PiProgram ResProgram = BuildResult->Ptr.load();
1774 
1775  // Cache supports key with once device only, but here we have multiple
1776  // devices a program is built for, so add the program to the cache for all
1777  // other devices.
1778  auto CacheOtherDevices = [ResProgram]() { return ResProgram; };
1779 
1780  // The program for device "0" is already added to the cache during the first
1781  // call to getOrBuild, so starting with "1"
1782  for (size_t Idx = 1; Idx < Devs.size(); ++Idx) {
1783  const RT::PiDevice PiDeviceAdd =
1784  getRawSyclObjImpl(Devs[Idx])->getHandleRef();
1785 
1786  getOrBuild<PiProgramT, compile_program_error>(
1787  Cache,
1788  std::make_pair(std::make_pair(std::move(SpecConsts), (size_t)ImgPtr),
1789  std::make_pair(PiDeviceAdd, CompileOpts + LinkOpts)),
1790  AcquireF, GetF, CacheOtherDevices);
1791  }
1792 
1793  // devive_image_impl shares ownership of PIProgram with, at least, program
1794  // cache. The ref counter will be descremented in the destructor of
1795  // device_image_impl
1796  const detail::plugin &Plugin = ContextImpl->getPlugin();
1797  Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
1798 
1799  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
1800  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
1801  InputImpl->get_kernel_ids_ref(), ResProgram,
1802  InputImpl->get_spec_const_data_ref(),
1803  InputImpl->get_spec_const_blob_ref());
1804 
1805  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
1806 }
1807 
1808 std::pair<RT::PiKernel, std::mutex *> ProgramManager::getOrCreateKernel(
1809  const context &Context, const std::string &KernelName,
1810  const property_list &PropList, RT::PiProgram Program) {
1811 
1812  (void)PropList;
1813 
1814  const ContextImplPtr Ctx = getSyclObjImpl(Context);
1815 
1816  using PiKernelT = KernelProgramCache::PiKernelT;
1817  using KernelCacheT = KernelProgramCache::KernelCacheT;
1818  using KernelByNameT = KernelProgramCache::KernelByNameT;
1819 
1820  KernelProgramCache &Cache = Ctx->getKernelProgramCache();
1821 
1822  auto AcquireF = [](KernelProgramCache &Cache) {
1823  return Cache.acquireKernelsPerProgramCache();
1824  };
1825  auto GetF =
1826  [&Program](const Locked<KernelCacheT> &LockedCache) -> KernelByNameT & {
1827  return LockedCache.get()[Program];
1828  };
1829  auto BuildF = [&Program, &KernelName, &Ctx] {
1830  PiKernelT *Result = nullptr;
1831 
1832  const detail::plugin &Plugin = Ctx->getPlugin();
1833  Plugin.call<PiApiKind::piKernelCreate>(Program, KernelName.c_str(),
1834  &Result);
1835 
1837  sizeof(pi_bool), &PI_TRUE);
1838 
1839  return Result;
1840  };
1841 
1842  auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
1843  Cache, KernelName, AcquireF, GetF, BuildF);
1844  return std::make_pair(BuildResult->Ptr.load(),
1845  &(BuildResult->MBuildResultMutex));
1846 }
1847 
1848 } // namespace detail
1849 } // namespace sycl
1850 } // __SYCL_INLINE_NAMESPACE(cl)
1851 
1854 }
1855 
1856 // Executed as a part of current module's (.exe, .dll) static initialization
1858  (void)desc;
1859  // TODO implement the function
1860 }
cl::sycl::detail::getRawSyclObjImpl
detail::add_pointer_t< typename decltype(T::impl)::element_type > getRawSyclObjImpl(const T &SyclObject)
Definition: common.hpp:191
cl::sycl::detail::KernelProgramCache::BuildError::Code
pi_int32 Code
Definition: kernel_program_cache.hpp:37
cl::sycl::detail::pi::DeviceBinaryImage::getKernelParamOptInfo
const PropertyRange & getKernelParamOptInfo() const
Definition: pi.hpp:361
PI_DEVICE_BINARY_TYPE_NONE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE
Definition: pi.h:669
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_esimd_emulator.cpp:1035
cl::sycl::backend
backend
Definition: backend_types.hpp:21
cl::sycl::detail::KernelProgramCache::BuildResult
Denotes pointer to some entity with its general state and build error.
Definition: kernel_program_cache.hpp:48
cl::sycl::detail::Locked
Represents a reference to value with appropriate lock acquired.
Definition: locked.hpp:24
cl::sycl::detail::DbgProgMgr
static constexpr int DbgProgMgr
Definition: program_manager.cpp:46
_pi_offload_entry_struct
Definition: pi.h:626
PI_SUCCESS
@ PI_SUCCESS
Definition: pi.h:82
cl::sycl::detail::pi::DeviceBinaryImage::getLinkOptions
const char * getLinkOptions() const
Definition: pi.hpp:313
cl::sycl::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:56
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:31
cl::sycl::detail::isDeviceLibRequired
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:857
cl::sycl::exception::get_cl_code
cl_int get_cl_code() const
Definition: exception.cpp:114
pi_bool
pi_uint32 pi_bool
Definition: pi.h:70
context_impl.hpp
cl::sycl::detail::KernelProgramCache::saveKernel
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
Definition: kernel_program_cache.hpp:126
cl::sycl::detail::make_tuple
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:36
cl::sycl::detail::enableITTAnnotationsIfNeeded
static void enableITTAnnotationsIfNeeded(const RT::PiProgram &Prog, const plugin &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
Definition: program_manager.cpp:54
cl::sycl::detail::RTDeviceBinaryImage::supportsSpecConstants
bool supportsSpecConstants() const
Definition: device_binary_image.hpp:30
pi_device_binaries_struct::DeviceBinaries
pi_device_binary DeviceBinaries
Device binaries data.
Definition: pi.h:848
PI_INVALID_OPERATION
@ PI_INVALID_OPERATION
Definition: pi.h:84
PI_DEVICE_BINARY_TYPE_SPIRV
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV
Definition: pi.h:674
type_traits.hpp
PI_DEVICE_INFO_NAME
@ PI_DEVICE_INFO_NAME
Definition: pi.h:262
cl::sycl::detail::SpvFileKSId
constexpr KernelSetId SpvFileKSId
Definition: common.hpp:327
cl::sycl::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:20
cl::sycl::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:668
pi_device_binary_struct::DeviceTargetSpec
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
Definition: pi.h:763
config.hpp
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:742
cl::sycl::detail::pi::DeviceBinaryImage::getFormat
pi::PiDeviceBinaryType getFormat() const
Returns the format of the binary image.
Definition: pi.hpp:319
cl::sycl::detail::KernelProgramCache
Definition: kernel_program_cache.hpp:31
PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
Definition: pi.h:676
cl::sycl::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:623
stl.hpp
cl::sycl::detail::RTDeviceBinaryImage::print
void print() const override
Definition: device_binary_image.hpp:36
cl::sycl::detail::pi::PiProgram
::pi_program PiProgram
Definition: pi.hpp:107
device.hpp
cl::sycl::detail::waitUntilBuilt
RetT * waitUntilBuilt(KernelProgramCache &Cache, KernelProgramCache::BuildResult< RetT > *BuildResult)
Definition: program_manager.cpp:121
cl::sycl::detail::KernelProgramCache::KernelByNameT
std::map< std::string, KernelWithBuildStateT > KernelByNameT
Definition: kernel_program_cache.hpp:82
cl::sycl::detail::pi::getBinaryImageFormat
PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
Definition: pi.cpp:664
cl::sycl::detail::getOrBuild
KernelProgramCache::BuildResult< RetT > * getOrBuild(KernelProgramCache &KPCache, KeyT &&CacheKey, AcquireFT &&Acquire, GetCacheFT &&GetCache, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
Definition: program_manager.cpp:161
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_esimd_emulator.cpp:1022
cl::sycl::detail::KernelProgramCache::BuildError
Denotes build error data.
Definition: kernel_program_cache.hpp:35
cl::sycl::detail::device_image_plain
Definition: kernel_bundle.hpp:70
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::end
ConstIterator end() const
Definition: pi.hpp:278
cl::sycl::detail::pi::DeviceBinaryImage::getCompileOptions
const char * getCompileOptions() const
Definition: pi.hpp:308
cl::sycl::detail::pi::PiDevice
::pi_device PiDevice
Definition: pi.hpp:102
cl::sycl::detail::KernelProgramCache::notifyAllBuild
void notifyAllBuild(BuildResult< T > &BR) const
Definition: kernel_program_cache.hpp:111
pi_device_binary_struct::BinaryStart
const unsigned char * BinaryStart
Pointer to the target code start.
Definition: pi.h:775
cl::sycl::detail::SYCLConfig
Definition: config.hpp:104
cl::sycl::detail::BuildState
BuildState
Definition: program_manager.cpp:48
_pi_result
_pi_result
Definition: pi.h:81
cl::sycl::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:59
context.hpp
cl::sycl::platform::get_info
info::param_traits< info::platform, param >::return_type get_info() const
Queries this SYCL platform for info.
Definition: platform.cpp:54
cl::sycl::errc::feature_not_supported
@ feature_not_supported
cl::sycl::detail::getFormatStr
static const char * getFormatStr(RT::PiDeviceBinaryType Format)
Definition: program_manager.cpp:289
cl::sycl::detail::program_impl
Definition: program_impl.hpp:37
cl::sycl::detail::loadDeviceLibFallback
static RT::PiProgram loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, const RT::PiDevice &Device, std::map< std::pair< DeviceLibExt, RT::PiDevice >, RT::PiProgram > &CachedLibPrograms)
Definition: program_manager.cpp:717
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:645
os_util.hpp
cl::sycl::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:121
cl::sycl::detail::LastKSId
constexpr KernelSetId LastKSId
Definition: common.hpp:328
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:840
cl::sycl::detail::KernelProgramCache::PiKernelT
std::remove_pointer< RT::PiKernel >::type PiKernelT
Definition: kernel_program_cache.hpp:78
pi_device_binary_struct::BinaryEnd
const unsigned char * BinaryEnd
Pointer to the target code end.
Definition: pi.h:777
cl::sycl::detail::pi::PiDeviceBinaryType
::pi_device_binary_type PiDeviceBinaryType
Definition: pi.hpp:105
cl::sycl::detail::plugin::getPiPlugin
const RT::PiPlugin & getPiPlugin() const
Definition: plugin.hpp:104
cl::sycl::detail::isDeviceBinaryTypeSupported
static bool isDeviceBinaryTypeSupported(const context &C, RT::PiDeviceBinaryType Format)
Definition: program_manager.cpp:245
cl::sycl::detail::RTDeviceBinaryImage::getRawData
const pi_device_binary_struct & getRawData() const
Definition: device_binary_image.hpp:34
PI_USM_INDIRECT_ACCESS
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1253
device_impl.hpp
cl::sycl::detail::BS_Failed
@ BS_Failed
Definition: program_manager.cpp:48
cl::sycl::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:25
PI_CONTEXT_INFO_NUM_DEVICES
@ PI_CONTEXT_INFO_NUM_DEVICES
Definition: pi.h:325
util.hpp
cl::sycl::detail::applyOptionsFromEnvironment
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
Definition: program_manager.cpp:399
cl::sycl::detail::KernelProgramCache::KernelCacheT
std::map< RT::PiProgram, KernelByNameT > KernelCacheT
Definition: kernel_program_cache.hpp:83
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:700
cl::sycl::detail::KernelSetId
size_t KernelSetId
Definition: common.hpp:324
PI_KERNEL_INFO_PROGRAM
@ PI_KERNEL_INFO_PROGRAM
Definition: pi.h:346
cl::sycl::detail::KernelProgramCache::BuildResult::Ptr
std::atomic< T * > Ptr
Definition: kernel_program_cache.hpp:49
cl::sycl::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
cl::sycl::detail::plugin::call_nocheck
RT::PiResult call_nocheck(ArgsT... Args) const
Calls the PiApi, traces the call, and returns the result.
Definition: plugin.hpp:141
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:578
cl::sycl::context::get_platform
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:123
cl::sycl::detail::applyOptionsFromImage
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:366
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:994
compile_program_error
cl::sycl::detail::pi::DeviceBinaryImage::getExportedSymbols
const PropertyRange getExportedSymbols() const
Definition: pi.hpp:372
PI_ERROR_UNKNOWN
@ PI_ERROR_UNKNOWN
Definition: pi.h:119
cl::sycl::detail::pi::asUint32
static pi_uint32 asUint32(const void *Addr)
Definition: pi.cpp:604
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::begin
ConstIterator begin() const
Definition: pi.hpp:277
char
cl::sycl::detail::program_impl::get
cl_program get() const
Returns a valid cl_program instance.
Definition: program_impl.cpp:215
cl::sycl::detail::pi::DeviceBinaryProperty::asUint32
pi_uint32 asUint32() const
Definition: pi.cpp:610
cl::sycl::detail::pi::DeviceBinaryImage::getProgramMetadata
const PropertyRange & getProgramMetadata() const
Definition: pi.hpp:371
pi_uint32
uint32_t pi_uint32
Definition: pi.h:68
cl::sycl::detail::pi::DeviceBinaryImage::getAssertUsed
const PropertyRange getAssertUsed() const
Definition: pi.hpp:364
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:743
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:1852
program_impl.hpp
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:34
cl::sycl::detail::program_impl::flush_spec_constants
void flush_spec_constants(const RTDeviceBinaryImage &Img, RT::PiProgram NativePrg=nullptr) const
Takes current values of specialization constants and "injects" them into the underlying native progra...
Definition: program_impl.cpp:538
piProgramGetBuildInfo
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1017
cl::sycl::detail::createSpirvProgram
static RT::PiProgram createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
Definition: program_manager.cpp:96
cl::sycl::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:590
cl::sycl::detail::getDeviceLibExtensionStr
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
Definition: program_manager.cpp:698
pi_device_binaries_struct::NumDeviceBinaries
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
Definition: pi.h:846
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_esimd_emulator.cpp:976
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_esimd_emulator.cpp:1495
PI_DEVICE_BINARY_TYPE_NATIVE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE
Definition: pi.h:671
cl::sycl::detail::getDeviceLibPrograms
static std::vector< RT::PiProgram > getDeviceLibPrograms(const ContextImplPtr Context, const RT::PiDevice &Device, std::map< std::pair< DeviceLibExt, RT::PiDevice >, RT::PiProgram > &CachedLibPrograms, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:864
cl::sycl::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:38
cl::sycl::detail::createKernelArgMask
static ProgramManager::KernelArgMask createKernelArgMask(const pi::ByteArray &Bytes)
Definition: program_manager.cpp:998
cl::sycl::detail::pi::DeviceBinaryImage::getDeviceLibReqMask
const PropertyRange & getDeviceLibReqMask() const
Definition: pi.hpp:360
cl::sycl::detail::device_image_impl::SpecConstDescT::BlobOffset
unsigned int BlobOffset
Definition: device_image_impl.hpp:45
cl::sycl::detail::BS_Done
@ BS_Done
Definition: program_manager.cpp:48
cl::sycl::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:182
cl::sycl::detail::device_image_impl::SpecConstDescT::Size
unsigned int Size
Definition: device_image_impl.hpp:44
cl::sycl::detail::KernelProgramCache::BuildResult::MBuildResultMutex
std::mutex MBuildResultMutex
A mutex to be employed along with MBuildCV.
Definition: kernel_program_cache.hpp:65
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_esimd_emulator.cpp:485
cl::sycl::detail::pi::DeviceBinaryProperty
Definition: pi.hpp:227
cl::sycl::detail::ProgramManager::addImages
void addImages(pi_device_binaries DeviceImages)
Definition: program_manager.cpp:1031
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::program_impl::get_build_options
std::string get_build_options() const
Returns the compile, link, or build options, from whichever of those operations was performed most re...
Definition: program_impl.hpp:310
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange
Definition: pi.hpp:249
global_handler.hpp
_pi_device_binary_property_struct
Definition: pi.h:648
cl::sycl::detail::ProgramManager::getInstance
static ProgramManager & getInstance()
Definition: program_manager.cpp:63
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:523
PI_INVALID_KERNEL_NAME
@ PI_INVALID_KERNEL_NAME
Definition: pi.h:83
cl::sycl::detail::pi::DeviceBinaryImage::getProperty
pi_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
Definition: pi.cpp:649
cl::sycl::detail::program_impl::stableSerializeSpecConstRegistry
void stableSerializeSpecConstRegistry(SerializedObj &Dst) const
Definition: program_impl.hpp:336
cl::sycl::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:89
cl::sycl::detail::pi::DeviceBinaryImage::getSize
size_t getSize() const
Definition: pi.hpp:303
program_manager.hpp
cl::sycl::get_kernel_ids
std::vector< kernel_id > get_kernel_ids()
Definition: kernel_bundle.cpp:273
persistent_device_code_cache.hpp
cl::sycl::context::get_devices
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:127
cl::sycl::detail::ProgramManager
Definition: program_manager.hpp:70
cl::sycl::detail::device_image_impl::SpecConstDescT::ID
unsigned int ID
Definition: device_image_impl.hpp:42
cl::sycl::detail::UseSpvEnv
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:1857
cl::sycl::detail::compatibleWithDevice
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
Definition: program_manager.cpp:1302
cl::sycl::detail::KernelProgramCache::BuildError::Msg
std::string Msg
Definition: kernel_program_cache.hpp:36
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(cl::sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
cl::sycl::detail::OSModuleHandle
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
device_image_impl.hpp
cl::sycl::detail::ProgramManager::KernelArgMask
std::vector< bool > KernelArgMask
Definition: program_manager.hpp:73
PI_INVALID_VALUE
@ PI_INVALID_VALUE
Definition: pi.h:87
cl::sycl::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:447
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1052
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:182
backend_types.hpp
exception.hpp
cl::sycl::detail::KernelProgramCache::ProgramCacheT
std::map< ProgramCacheKeyT, ProgramWithBuildStateT > ProgramCacheT
Definition: kernel_program_cache.hpp:75
cl::sycl::detail::getBinImageState
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
Definition: program_manager.cpp:1286
cl::sycl::detail::KernelProgramCache::waitUntilBuilt
void waitUntilBuilt(BuildResult< T > &BR, Predicate Pred) const
Definition: kernel_program_cache.hpp:105
cl::sycl::detail::getDeviceLibFilename
static const char * getDeviceLibFilename(DeviceLibExt Extension)
Definition: program_manager.cpp:679
cl::sycl::detail::program_impl::hasSetSpecConstants
bool hasSetSpecConstants() const
Tells whether a specialization constant has been set for this program.
Definition: program_impl.hpp:341
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:35
cl::sycl::detail::KernelProgramCache::BuildResult::Error
BuildError Error
Definition: kernel_program_cache.hpp:51
piProgramCreate
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_esimd_emulator.cpp:972
cl::sycl::detail::KernelProgramCache::acquireKernelsPerProgramCache
Locked< KernelCacheT > acquireKernelsPerProgramCache()
Definition: kernel_program_cache.hpp:100
cl::sycl::detail::createBinaryProgram
static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< pi_device_binary_property > Metadata)
Definition: program_manager.cpp:68
cl::sycl::detail::pi::ByteArray
Definition: pi.hpp:211
cl::sycl::cl_uint
std::uint32_t cl_uint
Definition: aliases.hpp:83
cl::sycl::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:54
cl::sycl::detail::BS_InProgress
@ BS_InProgress
Definition: program_manager.cpp:48
piextDeviceSelectBinary
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_esimd_emulator.cpp:1505
common.hpp
cl::sycl::ext::oneapi::experimental::spec_const_error
Definition: spec_constant.hpp:30
cl::sycl::exception
Definition: exception.hpp:63
cl::sycl::detail::KernelProgramCache::acquireCachedPrograms
Locked< ProgramCacheT > acquireCachedPrograms()
Definition: kernel_program_cache.hpp:96
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:699
_pi_plugin::PiFunctionTable
struct _pi_plugin::FunctionPointers PiFunctionTable
pi_device_binary_struct::EntriesEnd
_pi_offload_entry EntriesEnd
Definition: pi.h:780
cl::sycl::detail::loadDeviceLib
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, RT::PiProgram &Prog)
Definition: program_manager.cpp:658
cl::sycl::detail::pi::DeviceBinaryImage::get
pi_device_binary get() const
Definition: pi.hpp:383
cl::sycl::detail::device_image_impl::SpecConstDescT::IsSet
bool IsSet
Definition: device_image_impl.hpp:46
PI_PROGRAM_INFO_DEVICES
@ PI_PROGRAM_INFO_DEVICES
Definition: pi.h:315
spec_constant_impl.hpp
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::isAvailable
bool isAvailable() const
Definition: pi.hpp:280
piextProgramSetSpecializationConstant
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_esimd_emulator.cpp:1500
cl::sycl::detail::program_impl::getHandleRef
RT::PiProgram & getHandleRef()
Definition: program_impl.hpp:133
cl::sycl::exception::what
const char * what() const noexcept final
Definition: exception.cpp:103
_pi_offload_entry_struct::name
char * name
Definition: pi.h:628
cl::sycl::detail::plugin::reportPiError
void reportPiError(RT::PiResult pi_result, const char *context) const
Definition: plugin.hpp:120
PI_TRUE
const pi_bool PI_TRUE
Definition: pi.h:486
piProgramBuild
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
cl::sycl::detail::KernelProgramCache::PiProgramT
std::remove_pointer< RT::PiProgram >::type PiProgramT
Definition: kernel_program_cache.hpp:70
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:698
cl::sycl::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:49
cl::sycl::detail::LessByNameComp
Definition: kernel_id_impl.hpp:16
cl::sycl::detail::device_image_impl::SpecConstDescT
Definition: device_image_impl.hpp:41
pi_int32
int32_t pi_int32
Definition: pi.h:67
spec_constant.hpp
cl::sycl::detail::KernelProgramCache::tryToGetKernelFast
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
Definition: kernel_program_cache.hpp:116
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:71
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12
cl::sycl::detail::DeviceImageImplPtr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
Definition: kernel_bundle.hpp:66
pi_device_binary_struct::EntriesBegin
_pi_offload_entry EntriesBegin
the offload entry table
Definition: pi.h:779
cl::sycl::detail::KernelProgramCache::BuildResult::State
std::atomic< int > State
Definition: kernel_program_cache.hpp:50