DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.cpp
Go to the documentation of this file.
1 //==------ program_manager.cpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/config.hpp>
10 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
13 #include <detail/event_impl.hpp>
16 #include <detail/program_impl.hpp>
18 #include <detail/queue_impl.hpp>
20 #include <sycl/aspects.hpp>
21 #include <sycl/backend_types.hpp>
22 #include <sycl/context.hpp>
23 #include <sycl/detail/common.hpp>
24 #include <sycl/detail/os_util.hpp>
26 #include <sycl/detail/util.hpp>
27 #include <sycl/device.hpp>
28 #include <sycl/exception.hpp>
30 #include <sycl/stl.hpp>
31 
32 #include <algorithm>
33 #include <cassert>
34 #include <cstdint>
35 #include <cstdlib>
36 #include <cstring>
37 #include <fstream>
38 #include <memory>
39 #include <mutex>
40 #include <sstream>
41 #include <string>
42 #include <variant>
43 
44 namespace sycl {
46 namespace detail {
47 
48 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
49 
50 static constexpr int DbgProgMgr = 0;
51 
52 static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
53 
57  const plugin &Plugin) {
59  constexpr char SpecValue = 1;
61  Prog, ITTSpecConstId, sizeof(char), &SpecValue);
62  }
63 }
64 
65 ProgramManager &ProgramManager::getInstance() {
66  return GlobalHandler::instance().getProgramManager();
67 }
68 
69 static RT::PiProgram
70 createBinaryProgram(const ContextImplPtr Context, const device &Device,
71  const unsigned char *Data, size_t DataLen,
72  const std::vector<pi_device_binary_property> Metadata) {
73  const detail::plugin &Plugin = Context->getPlugin();
74 #ifndef _NDEBUG
75  pi_uint32 NumDevices = 0;
76  Plugin.call<PiApiKind::piContextGetInfo>(Context->getHandleRef(),
78  sizeof(NumDevices), &NumDevices,
79  /*param_value_size_ret=*/nullptr);
80  assert(NumDevices > 0 &&
81  "Only a single device is supported for AOT compilation");
82 #endif
83 
84  RT::PiProgram Program;
85  const RT::PiDevice PiDevice = getSyclObjImpl(Device)->getHandleRef();
86  pi_int32 BinaryStatus = CL_SUCCESS;
88  Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data,
89  Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
90 
91  if (BinaryStatus != CL_SUCCESS) {
92  throw runtime_error("Creating program with binary failed.", BinaryStatus);
93  }
94 
95  return Program;
96 }
97 
99  const unsigned char *Data,
100  size_t DataLen) {
101  RT::PiProgram Program = nullptr;
102  const detail::plugin &Plugin = Context->getPlugin();
103  Plugin.call<PiApiKind::piProgramCreate>(Context->getHandleRef(), Data,
104  DataLen, &Program);
105  return Program;
106 }
107 
108 RTDeviceBinaryImage &
109 ProgramManager::getDeviceImage(OSModuleHandle M, const std::string &KernelName,
110  const context &Context, const device &Device,
111  bool JITCompilationIsRequired) {
112  if (DbgProgMgr > 0)
113  std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \""
114  << KernelName << "\", " << getRawSyclObjImpl(Context) << ", "
115  << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired
116  << ")\n";
117 
118  KernelSetId KSId = getKernelSetId(M, KernelName);
119  return getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
120 }
121 
141 template <typename RetT, typename ExceptionT, typename GetCachedBuildFT,
142  typename BuildFT>
144 getOrBuild(KernelProgramCache &KPCache, GetCachedBuildFT &&GetCachedBuild,
145  BuildFT &&Build) {
146  using BuildState = KernelProgramCache::BuildState;
147 
148  auto [BuildResult, InsertionTookPlace] = GetCachedBuild();
149 
150  // no insertion took place, thus some other thread has already inserted smth
151  // in the cache
152  if (!InsertionTookPlace) {
153  for (;;) {
154  RetT *Result = KPCache.waitUntilBuilt<ExceptionT>(BuildResult);
155 
156  if (Result)
157  return BuildResult;
158 
159  // Previous build is failed. There was no SYCL exception though.
160  // We might try to build once more.
161  BuildState Expected = BuildState::BS_Failed;
162  BuildState Desired = BuildState::BS_InProgress;
163 
164  if (BuildResult->State.compare_exchange_strong(Expected, Desired))
165  break; // this thread is the building thread now
166  }
167  }
168 
169  // only the building thread will run this
170  try {
171  RetT *Desired = Build();
172 
173 #ifndef NDEBUG
174  RetT *Expected = nullptr;
175 
176  if (!BuildResult->Ptr.compare_exchange_strong(Expected, Desired))
177  // We've got a funny story here
178  assert(false && "We've build an entity that is already have been built.");
179 #else
180  BuildResult->Ptr.store(Desired);
181 #endif
182 
183  {
184  // Even if shared variable is atomic, it must be modified under the mutex
185  // in order to correctly publish the modification to the waiting thread
186  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
187  BuildResult->State.store(BuildState::BS_Done);
188  }
189 
190  KPCache.notifyAllBuild(*BuildResult);
191 
192  return BuildResult;
193  } catch (const exception &Ex) {
194  BuildResult->Error.Msg = Ex.what();
195  BuildResult->Error.Code = Ex.get_cl_code();
196 
197  {
198  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
199  BuildResult->State.store(BuildState::BS_Failed);
200  }
201 
202  KPCache.notifyAllBuild(*BuildResult);
203 
204  std::rethrow_exception(std::current_exception());
205  } catch (...) {
206  {
207  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
208  BuildResult->State.store(BuildState::BS_Failed);
209  }
210 
211  KPCache.notifyAllBuild(*BuildResult);
212 
213  std::rethrow_exception(std::current_exception());
214  }
215 }
216 
217 // TODO replace this with a new PI API function
219  RT::PiDeviceBinaryType Format) {
220  // All formats except PI_DEVICE_BINARY_TYPE_SPIRV are supported.
221  if (Format != PI_DEVICE_BINARY_TYPE_SPIRV)
222  return true;
223 
224  const backend ContextBackend =
225  detail::getSyclObjImpl(C)->getPlugin().getBackend();
226 
227  // The CUDA backend cannot use SPIR-V
228  if (ContextBackend == backend::ext_oneapi_cuda)
229  return false;
230 
231  std::vector<device> Devices = C.get_devices();
232 
233  // Program type is SPIR-V, so we need a device compiler to do JIT.
234  for (const device &D : Devices) {
235  if (!D.get_info<info::device::is_compiler_available>())
236  return false;
237  }
238 
239  // OpenCL 2.1 and greater require clCreateProgramWithIL
240  if (ContextBackend == backend::opencl) {
241  std::string ver = C.get_platform().get_info<info::platform::version>();
242  if (ver.find("OpenCL 1.0") == std::string::npos &&
243  ver.find("OpenCL 1.1") == std::string::npos &&
244  ver.find("OpenCL 1.2") == std::string::npos &&
245  ver.find("OpenCL 2.0") == std::string::npos)
246  return true;
247  }
248 
249  for (const device &D : Devices) {
250  // We need cl_khr_il_program extension to be present
251  // and we can call clCreateProgramWithILKHR using the extension
252  std::vector<std::string> Extensions =
253  D.get_info<info::device::extensions>();
254  if (Extensions.end() ==
255  std::find(Extensions.begin(), Extensions.end(), "cl_khr_il_program"))
256  return false;
257  }
258 
259  return true;
260 }
261 
262 static const char *getFormatStr(RT::PiDeviceBinaryType Format) {
263  switch (Format) {
265  return "none";
267  return "native";
269  return "SPIR-V";
271  return "LLVM IR";
272  }
273  assert(false && "Unknown device image format");
274  return "unknown";
275 }
276 
277 RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img,
278  const context &Context,
279  const device &Device) {
280  if (DbgProgMgr > 0)
281  std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", "
282  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
283  << ")\n";
284  const pi_device_binary_struct &RawImg = Img.getRawData();
285 
286  // perform minimal sanity checks on the device image and the descriptor
287  if (RawImg.BinaryEnd < RawImg.BinaryStart) {
288  throw runtime_error("Malformed device program image descriptor",
289  PI_ERROR_INVALID_VALUE);
290  }
291  if (RawImg.BinaryEnd == RawImg.BinaryStart) {
292  throw runtime_error("Invalid device program image: size is zero",
293  PI_ERROR_INVALID_VALUE);
294  }
295  size_t ImgSize = Img.getSize();
296 
297  // TODO if the binary image is a part of the fat binary, the clang
298  // driver should have set proper format option to the
299  // clang-offload-wrapper. The fix depends on AOT compilation
300  // implementation, so will be implemented together with it.
301  // Img->Format can't be updated as it is inside of the in-memory
302  // OS module binary.
303  RT::PiDeviceBinaryType Format = Img.getFormat();
304 
305  if (Format == PI_DEVICE_BINARY_TYPE_NONE)
306  Format = pi::getBinaryImageFormat(RawImg.BinaryStart, ImgSize);
307  // RT::PiDeviceBinaryType Format = Img->Format;
308  // assert(Format != PI_DEVICE_BINARY_TYPE_NONE && "Image format not set");
309 
310  if (!isDeviceBinaryTypeSupported(Context, Format))
311  throw feature_not_supported(
312  "SPIR-V online compilation is not supported in this context",
313  PI_ERROR_INVALID_OPERATION);
314 
315  // Get program metadata from properties
316  auto ProgMetadata = Img.getProgramMetadata();
317  std::vector<pi_device_binary_property> ProgMetadataVector{
318  ProgMetadata.begin(), ProgMetadata.end()};
319 
320  // Load the image
321  const ContextImplPtr Ctx = getSyclObjImpl(Context);
323  ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize)
324  : createBinaryProgram(Ctx, Device, RawImg.BinaryStart,
325  ImgSize, ProgMetadataVector);
326 
327  {
328  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
329  // associate the PI program with the image it was created for
330  NativePrograms[Res] = &Img;
331  }
332 
333  Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
334 
335  if (DbgProgMgr > 1)
336  std::cerr << "created program: " << Res
337  << "; image format: " << getFormatStr(Format) << "\n";
338 
339  return Res;
340 }
341 
342 static void appendLinkOptionsFromImage(std::string &LinkOpts,
343  const RTDeviceBinaryImage &Img) {
344  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
345  // Update only if link options are not overwritten by environment variable
346  if (!LinkOptsEnv) {
347  const char *TemporaryStr = Img.getLinkOptions();
348  if (TemporaryStr != nullptr) {
349  if (!LinkOpts.empty())
350  LinkOpts += " ";
351  LinkOpts += std::string(TemporaryStr);
352  }
353  }
354 }
355 
357  const char *PropName) {
358  pi_device_binary_property Prop = Img.getProperty(PropName);
359  return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0);
360 }
361 
362 static void appendCompileOptionsFromImage(std::string &CompileOpts,
363  const RTDeviceBinaryImage &Img,
364  const std::vector<device> &Devs,
365  const detail::plugin &Plugin) {
366  // Build options are overridden if environment variables are present.
367  // Environment variables are not changed during program lifecycle so it
368  // is reasonable to use static here to read them only once.
369  static const char *CompileOptsEnv =
371  // Update only if compile options are not overwritten by environment
372  // variable
373  if (!CompileOptsEnv) {
374  if (!CompileOpts.empty())
375  CompileOpts += " ";
376  const char *TemporaryStr = Img.getCompileOptions();
377  if (TemporaryStr != nullptr)
378  CompileOpts += std::string(TemporaryStr);
379  }
380  bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage");
381  // TODO: Remove isDoubleGRF check in next ABI break
382  bool isLargeGRF = getUint32PropAsBool(Img, "isLargeGRF") ||
383  getUint32PropAsBool(Img, "isDoubleGRF");
384  // The -vc-codegen option is always preserved for ESIMD kernels, regardless
385  // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable.
386  if (isEsimdImage) {
387  if (!CompileOpts.empty())
388  CompileOpts += " ";
389  CompileOpts += "-vc-codegen";
390  // Allow warning and performance hints from vc/finalizer if the RT warning
391  // level is at least 1.
393  CompileOpts += " -disable-finalizer-msg";
394  }
395  if (isLargeGRF) {
396  if (!CompileOpts.empty())
397  CompileOpts += " ";
398  // TODO: Don't check the property or pass these flags after the next ABI
399  // break. The behavior is now controlled through the RegisterAllocMode
400  // metadata.
401  CompileOpts += isEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file";
402  }
403  if ((Plugin.getBackend() == backend::ext_oneapi_level_zero ||
404  Plugin.getBackend() == backend::opencl) &&
405  std::all_of(Devs.begin(), Devs.end(),
406  [](const device &Dev) { return Dev.is_gpu(); }) &&
407  Img.getDeviceGlobals().size() != 0) {
408  // If the image has device globals we need to add the
409  // -ze-take-global-address option to tell IGC to record addresses of these.
410  if (!CompileOpts.empty())
411  CompileOpts += " ";
412  CompileOpts += "-ze-take-global-address";
413  }
414 }
415 
416 static void applyOptionsFromImage(std::string &CompileOpts,
417  std::string &LinkOpts,
418  const RTDeviceBinaryImage &Img,
419  const std::vector<device> &Devices,
420  const detail::plugin &Plugin) {
421  appendCompileOptionsFromImage(CompileOpts, Img, Devices, Plugin);
422  appendLinkOptionsFromImage(LinkOpts, Img);
423 }
424 
425 static void applyCompileOptionsFromEnvironment(std::string &CompileOpts) {
426  // Environment variables are not changed during program lifecycle so it
427  // is reasonable to use static here to read them only once.
428  static const char *CompileOptsEnv =
430  if (CompileOptsEnv) {
431  CompileOpts = CompileOptsEnv;
432  }
433 }
434 
435 static void applyLinkOptionsFromEnvironment(std::string &LinkOpts) {
436  // Environment variables are not changed during program lifecycle so it
437  // is reasonable to use static here to read them only once.
438  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
439  if (LinkOptsEnv) {
440  LinkOpts = LinkOptsEnv;
441  }
442 }
443 
444 static void applyOptionsFromEnvironment(std::string &CompileOpts,
445  std::string &LinkOpts) {
446  // Build options are overridden if environment variables are present.
449 }
450 
451 std::pair<RT::PiProgram, bool> ProgramManager::getOrCreatePIProgram(
452  const RTDeviceBinaryImage &Img, const context &Context,
453  const device &Device, const std::string &CompileAndLinkOptions,
454  SerializedObj SpecConsts) {
455  RT::PiProgram NativePrg;
456 
457  auto BinProg = PersistentDeviceCodeCache::getItemFromDisc(
458  Device, Img, SpecConsts, CompileAndLinkOptions);
459  if (BinProg.size()) {
460  // Get program metadata from properties
461  auto ProgMetadata = Img.getProgramMetadata();
462  std::vector<pi_device_binary_property> ProgMetadataVector{
463  ProgMetadata.begin(), ProgMetadata.end()};
464 
465  // TODO: Build for multiple devices once supported by program manager
466  NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
467  (const unsigned char *)BinProg[0].data(),
468  BinProg[0].size(), ProgMetadataVector);
469  } else {
470  NativePrg = createPIProgram(Img, Context, Device);
471  }
472  return {NativePrg, BinProg.size()};
473 }
474 
477 static void emitBuiltProgramInfo(const pi_program &Prog,
478  const ContextImplPtr &Context) {
480  std::string ProgramBuildLog =
481  ProgramManager::getProgramBuildLog(Prog, Context);
482  std::clog << ProgramBuildLog << std::endl;
483  }
484 }
485 
486 RT::PiProgram ProgramManager::getBuiltPIProgram(
487  OSModuleHandle M, const ContextImplPtr &ContextImpl,
488  const DeviceImplPtr &DeviceImpl, const std::string &KernelName,
489  const program_impl *Prg, bool JITCompilationIsRequired) {
490  // TODO: Make sure that KSIds will be different for the case when the same
491  // kernel built with different options is present in the fat binary.
492  KernelSetId KSId = getKernelSetId(M, KernelName);
493 
494  using PiProgramT = KernelProgramCache::PiProgramT;
495 
496  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
497 
498  std::string CompileOpts;
499  std::string LinkOpts;
500  if (Prg) {
501  CompileOpts = Prg->get_build_options();
502  }
503 
504  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
505 
506  SerializedObj SpecConsts;
507  if (Prg)
508  Prg->stableSerializeSpecConstRegistry(SpecConsts);
509 
510  // Check if we can optimize program builds for sub-devices by using a program
511  // built for the root device
512  DeviceImplPtr RootDevImpl = DeviceImpl;
513  while (!RootDevImpl->isRootDevice()) {
514  auto ParentDev = detail::getSyclObjImpl(
515  RootDevImpl->get_info<info::device::parent_device>());
516  // Sharing is allowed within a single context only
517  if (!ContextImpl->hasDevice(ParentDev))
518  break;
519  RootDevImpl = ParentDev;
520  }
521 
522  pi_bool MustBuildOnSubdevice = PI_TRUE;
523  ContextImpl->getPlugin().call<PiApiKind::piDeviceGetInfo>(
524  RootDevImpl->getHandleRef(), PI_DEVICE_INFO_BUILD_ON_SUBDEVICE,
525  sizeof(pi_bool), &MustBuildOnSubdevice, nullptr);
526 
527  DeviceImplPtr Dev =
528  (MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl;
529  auto Context = createSyclObjFromImpl<context>(ContextImpl);
530  auto Device = createSyclObjFromImpl<device>(Dev);
531  const RTDeviceBinaryImage &Img =
532  getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
533 
534  // Check that device supports all aspects used by the kernel
535  const RTDeviceBinaryImage::PropertyRange &ARange =
536  Img.getDeviceRequirements();
537 
538 #define __SYCL_ASPECT(ASPECT, ID) \
539  case aspect::ASPECT: \
540  return #ASPECT;
541 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
542 // We don't need "case aspect::usm_allocator" here because it will duplicate
543 // "case aspect::usm_system_allocations", therefore leave this macro empty
544 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
545  auto getAspectNameStr = [](aspect AspectNum) -> std::string {
546  switch (AspectNum) {
547 #include <sycl/info/aspects.def>
548 #include <sycl/info/aspects_deprecated.def>
549  }
550  throw sycl::exception(errc::kernel_not_supported,
551  "Unknown aspect " +
552  std::to_string(static_cast<unsigned>(AspectNum)));
553  };
554 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
555 #undef __SYCL_ASPECT_DEPRECATED
556 #undef __SYCL_ASPECT
557 
559  using namespace std::literals;
560  if ((*It)->Name != "aspects"sv)
561  continue;
562  ByteArray Aspects = DeviceBinaryProperty(*It).asByteArray();
563  // 8 because we need to skip 64-bits of size of the byte array
564  auto *AIt = reinterpret_cast<const std::uint32_t *>(&Aspects[8]);
565  auto *AEnd =
566  reinterpret_cast<const std::uint32_t *>(&Aspects[0] + Aspects.size());
567  while (AIt != AEnd) {
568  auto Aspect = static_cast<aspect>(*AIt);
569  // Strict check for fp64 is disabled temporarily to avoid confusion.
570  if (!Dev->has(Aspect))
571  throw sycl::exception(errc::kernel_not_supported,
572  "Required aspect " + getAspectNameStr(Aspect) +
573  " is not supported on the device");
574  ++AIt;
575  }
576  }
577 
578  auto BuildF = [this, &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts,
579  &LinkOpts, SpecConsts] {
580  const detail::plugin &Plugin = ContextImpl->getPlugin();
581  applyOptionsFromImage(CompileOpts, LinkOpts, Img, {Device}, Plugin);
582 
583  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
584  Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
585 
586  if (!DeviceCodeWasInCache) {
587  if (Prg)
588  flushSpecConstants(*Prg, NativePrg, &Img);
589  if (Img.supportsSpecConstants())
590  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
591  }
592 
593  ProgramPtr ProgramManaged(
594  NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease);
595 
596  // Link a fallback implementation of device libraries if they are not
597  // supported by a device compiler.
598  // Pre-compiled programs (after AOT compilation or read from persitent
599  // cache) are supposed to be already linked.
600  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
601  // no fallback device library will be linked.
602  uint32_t DeviceLibReqMask = 0;
603  if (!DeviceCodeWasInCache &&
606  DeviceLibReqMask = getDeviceLibReqMask(Img);
607 
608  ProgramPtr BuiltProgram =
609  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
610  getRawSyclObjImpl(Device)->getHandleRef(), DeviceLibReqMask);
611 
612  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
613 
614  {
615  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
616  NativePrograms[BuiltProgram.get()] = &Img;
617  }
618 
619  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
620 
621  // Save program to persistent cache if it is not there
622  if (!DeviceCodeWasInCache)
623  PersistentDeviceCodeCache::putItemToDisc(
624  Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
625  return BuiltProgram.release();
626  };
627 
628  uint32_t ImgId = Img.getImageID();
629  const RT::PiDevice PiDevice = Dev->getHandleRef();
630  auto CacheKey =
631  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
632  std::make_pair(PiDevice, CompileOpts + LinkOpts));
633 
634  auto GetCachedBuildF = [&Cache, &CacheKey]() {
635  return Cache.getOrInsertProgram(CacheKey);
636  };
637 
638  auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
639  Cache, GetCachedBuildF, BuildF);
640  // getOrBuild is not supposed to return nullptr
641  assert(BuildResult != nullptr && "Invalid build result");
642  return BuildResult->Ptr.load();
643 }
644 
645 std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
646 ProgramManager::getOrCreateKernel(OSModuleHandle M,
647  const ContextImplPtr &ContextImpl,
648  const DeviceImplPtr &DeviceImpl,
649  const std::string &KernelName,
650  const program_impl *Prg) {
651  if (DbgProgMgr > 0) {
652  std::cerr << ">>> ProgramManager::getOrCreateKernel(" << M << ", "
653  << ContextImpl.get() << ", " << DeviceImpl.get() << ", "
654  << KernelName << ")\n";
655  }
656 
657  using PiKernelT = KernelProgramCache::PiKernelT;
658 
659  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
660 
661  std::string CompileOpts, LinkOpts;
662  SerializedObj SpecConsts;
663  if (Prg) {
664  CompileOpts = Prg->get_build_options();
665  Prg->stableSerializeSpecConstRegistry(SpecConsts);
666  }
667  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
668  const RT::PiDevice PiDevice = DeviceImpl->getHandleRef();
669 
670  auto key = std::make_tuple(std::move(SpecConsts), M, PiDevice,
671  CompileOpts + LinkOpts, KernelName);
672  auto ret_tuple = Cache.tryToGetKernelFast(key);
673  if (std::get<0>(ret_tuple))
674  return ret_tuple;
675 
676  RT::PiProgram Program =
677  getBuiltPIProgram(M, ContextImpl, DeviceImpl, KernelName, Prg);
678 
679  auto BuildF = [&Program, &KernelName, &ContextImpl] {
680  PiKernelT *Result = nullptr;
681 
682  const detail::plugin &Plugin = ContextImpl->getPlugin();
683  Plugin.call<errc::kernel_not_supported, PiApiKind::piKernelCreate>(
684  Program, KernelName.c_str(), &Result);
685 
686  // Some PI Plugins (like OpenCL) require this call to enable USM
687  // For others, PI will turn this into a NOP.
689  sizeof(pi_bool), &PI_TRUE);
690 
691  return Result;
692  };
693 
694  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
695  return Cache.getOrInsertKernel(Program, KernelName);
696  };
697 
698  auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
699  Cache, GetCachedBuildF, BuildF);
700  // getOrBuild is not supposed to return nullptr
701  assert(BuildResult != nullptr && "Invalid build result");
702  auto ret_val = std::make_tuple(BuildResult->Ptr.load(),
703  &(BuildResult->MBuildResultMutex), Program);
704  Cache.saveKernel(key, ret_val);
705  return ret_val;
706 }
707 
709 ProgramManager::getPiProgramFromPiKernel(RT::PiKernel Kernel,
710  const ContextImplPtr Context) {
711  RT::PiProgram Program;
712  const detail::plugin &Plugin = Context->getPlugin();
714  Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, nullptr);
715  return Program;
716 }
717 
718 std::string ProgramManager::getProgramBuildLog(const RT::PiProgram &Program,
719  const ContextImplPtr Context) {
720  size_t PIDevicesSize = 0;
721  const detail::plugin &Plugin = Context->getPlugin();
723  nullptr, &PIDevicesSize);
724  std::vector<RT::PiDevice> PIDevices(PIDevicesSize / sizeof(RT::PiDevice));
726  PIDevicesSize, PIDevices.data(),
727  nullptr);
728  std::string Log = "The program was built for " +
729  std::to_string(PIDevices.size()) + " devices";
730  for (RT::PiDevice &Device : PIDevices) {
731  std::string DeviceBuildInfoString;
732  size_t DeviceBuildInfoStrSize = 0;
734  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, 0, nullptr,
735  &DeviceBuildInfoStrSize);
736  if (DeviceBuildInfoStrSize > 0) {
737  std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
739  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize,
740  DeviceBuildInfo.data(), nullptr);
741  DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
742  }
743 
744  std::string DeviceNameString;
745  size_t DeviceNameStrSize = 0;
747  nullptr, &DeviceNameStrSize);
748  if (DeviceNameStrSize > 0) {
749  std::vector<char> DeviceName(DeviceNameStrSize);
751  DeviceNameStrSize,
752  DeviceName.data(), nullptr);
753  DeviceNameString = std::string(DeviceName.data());
754  }
755  Log += "\nBuild program log for '" + DeviceNameString + "':\n" +
756  DeviceBuildInfoString;
757  }
758  return Log;
759 }
760 
761 // TODO device libraries may use scpecialization constants, manifest files, etc.
762 // To support that they need to be delivered in a different container - so that
763 // pi_device_binary_struct can be created for each of them.
764 static bool loadDeviceLib(const ContextImplPtr Context, const char *Name,
765  RT::PiProgram &Prog) {
766  std::string LibSyclDir = OSUtil::getCurrentDSODir();
767  std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
768  std::ifstream::in | std::ifstream::binary);
769  if (!File.good()) {
770  return false;
771  }
772 
773  File.seekg(0, std::ios::end);
774  size_t FileSize = File.tellg();
775  File.seekg(0, std::ios::beg);
776  std::vector<char> FileContent(FileSize);
777  File.read(&FileContent[0], FileSize);
778  File.close();
779 
780  Prog =
781  createSpirvProgram(Context, (unsigned char *)&FileContent[0], FileSize);
782  return Prog != nullptr;
783 }
784 
785 // For each extension, a pair of library names. The first uses native support,
786 // the second emulates functionality in software.
787 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
789  {DeviceLibExt::cl_intel_devicelib_assert,
790  {nullptr, "libsycl-fallback-cassert.spv"}},
791  {DeviceLibExt::cl_intel_devicelib_math,
792  {nullptr, "libsycl-fallback-cmath.spv"}},
793  {DeviceLibExt::cl_intel_devicelib_math_fp64,
794  {nullptr, "libsycl-fallback-cmath-fp64.spv"}},
795  {DeviceLibExt::cl_intel_devicelib_complex,
796  {nullptr, "libsycl-fallback-complex.spv"}},
797  {DeviceLibExt::cl_intel_devicelib_complex_fp64,
798  {nullptr, "libsycl-fallback-complex-fp64.spv"}},
799  {DeviceLibExt::cl_intel_devicelib_cstring,
800  {nullptr, "libsycl-fallback-cstring.spv"}},
801  {DeviceLibExt::cl_intel_devicelib_imf,
802  {nullptr, "libsycl-fallback-imf.spv"}},
803  {DeviceLibExt::cl_intel_devicelib_imf_fp64,
804  {nullptr, "libsycl-fallback-imf-fp64.spv"}},
805  {DeviceLibExt::cl_intel_devicelib_imf_bf16,
806  {nullptr, "libsycl-fallback-imf-bf16.spv"}},
807  {DeviceLibExt::cl_intel_devicelib_bfloat16,
808  {"libsycl-native-bfloat16.spv", "libsycl-fallback-bfloat16.spv"}}};
809 
810 static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) {
811  auto LibPair = DeviceLibNames.find(Extension);
812  const char *Lib = nullptr;
813  if (LibPair != DeviceLibNames.end())
814  Lib = Native ? LibPair->second.first : LibPair->second.second;
815  if (Lib == nullptr)
816  throw compile_program_error("Unhandled (new?) device library extension",
817  PI_ERROR_INVALID_OPERATION);
818  return Lib;
819 }
820 
821 // For each extension understood by the SYCL runtime, the string representation
822 // of its name. Names with devicelib in them are internal to the runtime. Others
823 // are actual OpenCL extensions.
824 static const std::map<DeviceLibExt, const char *> DeviceLibExtensionStrs = {
825  {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"},
826  {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"},
827  {DeviceLibExt::cl_intel_devicelib_math_fp64,
828  "cl_intel_devicelib_math_fp64"},
829  {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"},
830  {DeviceLibExt::cl_intel_devicelib_complex_fp64,
831  "cl_intel_devicelib_complex_fp64"},
832  {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"},
833  {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"},
834  {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"},
835  {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"},
836  {DeviceLibExt::cl_intel_devicelib_bfloat16,
837  "cl_intel_bfloat16_conversions"}};
838 
839 static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) {
840  auto Ext = DeviceLibExtensionStrs.find(Extension);
841  if (Ext == DeviceLibExtensionStrs.end())
842  throw compile_program_error("Unhandled (new?) device library extension",
843  PI_ERROR_INVALID_OPERATION);
844  return Ext->second;
845 }
846 
848  DeviceLibExt Extension,
849  const RT::PiDevice &Device,
850  bool UseNativeLib) {
851 
852  auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib);
853 
854  auto LockedCache = Context->acquireCachedLibPrograms();
855  auto CachedLibPrograms = LockedCache.get();
856  auto CacheResult = CachedLibPrograms.emplace(
857  std::make_pair(std::make_pair(Extension, Device), nullptr));
858  bool Cached = !CacheResult.second;
859  auto LibProgIt = CacheResult.first;
860  RT::PiProgram &LibProg = LibProgIt->second;
861 
862  if (Cached)
863  return LibProg;
864 
865  if (!loadDeviceLib(Context, LibFileName, LibProg)) {
866  CachedLibPrograms.erase(LibProgIt);
867  throw compile_program_error(std::string("Failed to load ") + LibFileName,
868  PI_ERROR_INVALID_VALUE);
869  }
870 
871  const detail::plugin &Plugin = Context->getPlugin();
872  // TODO no spec constants are used in the std libraries, support in the future
874  LibProg,
875  /*num devices = */ 1, &Device,
876  // Do not use compile options for library programs: it is not clear
877  // if user options (image options) are supposed to be applied to
878  // library program as well, and what actually happens to a SPIR-V
879  // program if we apply them.
880  "", 0, nullptr, nullptr, nullptr, nullptr);
881  if (Error != PI_SUCCESS) {
882  CachedLibPrograms.erase(LibProgIt);
883  throw compile_program_error(
884  ProgramManager::getProgramBuildLog(LibProg, Context), Error);
885  }
886 
887  return LibProg;
888 }
889 
890 ProgramManager::ProgramManager() {
891  const char *SpvFile = std::getenv(UseSpvEnv);
892  // If a SPIR-V file is specified with an environment variable,
893  // register the corresponding image
894  if (SpvFile) {
895  m_UseSpvFile = true;
896  // The env var requests that the program is loaded from a SPIR-V file on
897  // disk
898  std::ifstream File(SpvFile, std::ios::binary);
899 
900  if (!File.is_open())
901  throw runtime_error(std::string("Can't open file specified via ") +
902  UseSpvEnv + ": " + SpvFile,
903  PI_ERROR_INVALID_VALUE);
904  File.seekg(0, std::ios::end);
905  size_t Size = File.tellg();
906  std::unique_ptr<char[]> Data(new char[Size]);
907  File.seekg(0);
908  File.read(Data.get(), Size);
909  File.close();
910  if (!File.good())
911  throw runtime_error(std::string("read from ") + SpvFile +
912  std::string(" failed"),
913  PI_ERROR_INVALID_VALUE);
914  auto ImgPtr = make_unique_ptr<DynRTDeviceBinaryImage>(
915  std::move(Data), Size, OSUtil::DummyModuleHandle);
916 
917  if (DbgProgMgr > 0) {
918  std::cerr << "loaded device image binary from " << SpvFile << "\n";
919  std::cerr << "format: " << getFormatStr(ImgPtr->getFormat()) << "\n";
920  }
921  // No need for a mutex here since all access to these private fields is
922  // blocked until the construction of the ProgramManager singleton is
923  // finished.
924  m_DeviceImages[SpvFileKSId].reset(
925  new std::vector<RTDeviceBinaryImageUPtr>());
926  m_DeviceImages[SpvFileKSId]->push_back(std::move(ImgPtr));
927  }
928 }
929 
931 ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId,
932  const context &Context, const device &Device,
933  bool JITCompilationIsRequired) {
934  if (DbgProgMgr > 0) {
935  std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \"" << KSId
936  << "\", " << getRawSyclObjImpl(Context) << ", "
937  << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired
938  << ")\n";
939 
940  std::cerr << "available device images:\n";
941  debugPrintBinaryImages();
942  }
943  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
944  auto It = m_DeviceImages.find(KSId);
945  assert(It != m_DeviceImages.end() &&
946  "No device image found for the given kernel set id");
947  std::vector<RTDeviceBinaryImageUPtr> &Imgs = *It->second;
948  const ContextImplPtr Ctx = getSyclObjImpl(Context);
949  pi_uint32 ImgInd = 0;
950  RTDeviceBinaryImage *Img = nullptr;
951 
952  // TODO: There may be cases with sycl::program class usage in source code
953  // that will result in a multi-device context. This case needs to be handled
954  // here or at the program_impl class level
955 
956  // Ask the native runtime under the given context to choose the device image
957  // it prefers.
958  std::vector<pi_device_binary> RawImgs(Imgs.size());
959  for (unsigned I = 0; I < Imgs.size(); I++)
960  RawImgs[I] = const_cast<pi_device_binary>(&Imgs[I]->getRawData());
961 
962  Ctx->getPlugin().call<PiApiKind::piextDeviceSelectBinary>(
963  getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
964  (pi_uint32)RawImgs.size(), &ImgInd);
965 
966  if (JITCompilationIsRequired) {
967  // If the image is already compiled with AOT, throw an exception.
968  const pi_device_binary_struct &RawImg = Imgs[ImgInd]->getRawData();
969  if ((strcmp(RawImg.DeviceTargetSpec,
971  (strcmp(RawImg.DeviceTargetSpec,
973  (strcmp(RawImg.DeviceTargetSpec,
975  throw feature_not_supported("Recompiling AOT image is not supported",
976  PI_ERROR_INVALID_OPERATION);
977  }
978  }
979 
980  Img = Imgs[ImgInd].get();
981 
982  if (DbgProgMgr > 0) {
983  std::cerr << "selected device image: " << &Img->getRawData() << "\n";
984  Img->print();
985  }
986  return *Img;
987 }
988 
989 static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) {
990  uint32_t Mask =
991  0x1 << (static_cast<uint32_t>(Ext) -
992  static_cast<uint32_t>(DeviceLibExt::cl_intel_devicelib_assert));
993  return ((DeviceLibReqMask & Mask) == Mask);
994 }
995 
996 static std::vector<RT::PiProgram>
997 getDeviceLibPrograms(const ContextImplPtr Context, const RT::PiDevice &Device,
998  uint32_t DeviceLibReqMask) {
999  std::vector<RT::PiProgram> Programs;
1000 
1001  std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1002  {DeviceLibExt::cl_intel_devicelib_assert,
1003  /* is fallback loaded? */ false},
1004  {DeviceLibExt::cl_intel_devicelib_math, false},
1005  {DeviceLibExt::cl_intel_devicelib_math_fp64, false},
1006  {DeviceLibExt::cl_intel_devicelib_complex, false},
1007  {DeviceLibExt::cl_intel_devicelib_complex_fp64, false},
1008  {DeviceLibExt::cl_intel_devicelib_cstring, false},
1009  {DeviceLibExt::cl_intel_devicelib_imf, false},
1010  {DeviceLibExt::cl_intel_devicelib_imf_fp64, false},
1011  {DeviceLibExt::cl_intel_devicelib_imf_bf16, false},
1012  {DeviceLibExt::cl_intel_devicelib_bfloat16, false}};
1013 
1014  // Disable all devicelib extensions requiring fp64 support if at least
1015  // one underlying device doesn't support cl_khr_fp64.
1016  std::string DevExtList = get_device_info_string(
1018  Context->getPlugin());
1019  const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64"));
1020 
1021  // Load a fallback library for an extension if the device does not
1022  // support it.
1023  for (auto &Pair : RequiredDeviceLibExt) {
1024  DeviceLibExt Ext = Pair.first;
1025  bool &FallbackIsLoaded = Pair.second;
1026 
1027  if (FallbackIsLoaded) {
1028  continue;
1029  }
1030 
1031  if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) {
1032  continue;
1033  }
1034  if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 ||
1035  Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) &&
1036  !fp64Support) {
1037  continue;
1038  }
1039 
1040  auto ExtName = getDeviceLibExtensionStr(Ext);
1041 
1042  bool InhibitNativeImpl = false;
1043  if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1044  InhibitNativeImpl = strstr(Env, ExtName) != nullptr;
1045  }
1046 
1047  bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1048  if (!DeviceSupports || InhibitNativeImpl) {
1049  Programs.push_back(
1050  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/false));
1051  FallbackIsLoaded = true;
1052  } else {
1053  // bfloat16 needs native library if device supports it
1054  if (Ext == DeviceLibExt::cl_intel_devicelib_bfloat16) {
1055  Programs.push_back(
1056  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/true));
1057  FallbackIsLoaded = true;
1058  }
1059  }
1060  }
1061  return Programs;
1062 }
1063 
1064 ProgramManager::ProgramPtr
1065 ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
1066  const std::string &CompileOptions,
1067  const std::string &LinkOptions,
1068  const RT::PiDevice &Device, uint32_t DeviceLibReqMask) {
1069 
1070  if (DbgProgMgr > 0) {
1071  std::cerr << ">>> ProgramManager::build(" << Program.get() << ", "
1072  << CompileOptions << ", " << LinkOptions << ", ... " << Device
1073  << ")\n";
1074  }
1075 
1076  // TODO: old sycl compiler always marks cassert fallback device library as
1077  // "required", this will lead to compatibilty issue when we enable online
1078  // link in SYCL runtime. If users compile their code with old compiler and run
1079  // their executable with latest SYCL runtime, cassert fallback spv file will
1080  // always be loaded which is not expected, cassert device library development
1081  // is still in progress, the unexpected loading may lead to runtime problem.
1082  // So, we clear bit 0 in device library require mask to avoid loading cassert
1083  // fallback device library and will revert this when cassert development is
1084  // done.
1085  DeviceLibReqMask &= 0xFFFFFFFE;
1086  bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1087 
1088  // TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
1089  // We do not link with other device libraries, because it may fail
1090  // due to unrecognized SPIR-V format of those libraries.
1091  if (CompileOptions.find(std::string("-cmc")) != std::string::npos ||
1092  CompileOptions.find(std::string("-vc-codegen")) != std::string::npos)
1093  LinkDeviceLibs = false;
1094 
1095  std::vector<RT::PiProgram> LinkPrograms;
1096  if (LinkDeviceLibs) {
1097  LinkPrograms = getDeviceLibPrograms(Context, Device, DeviceLibReqMask);
1098  }
1099 
1100  static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK");
1101  static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1');
1102 
1103  const detail::plugin &Plugin = Context->getPlugin();
1104  if (LinkPrograms.empty() && !ForceLink) {
1105  const std::string &Options = LinkOptions.empty()
1106  ? CompileOptions
1107  : (CompileOptions + " " + LinkOptions);
1108  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piProgramBuild>(
1109  Program.get(), /*num devices =*/1, &Device, Options.c_str(), nullptr,
1110  nullptr);
1111  if (Error != PI_SUCCESS)
1112  throw compile_program_error(getProgramBuildLog(Program.get(), Context),
1113  Error);
1114  return Program;
1115  }
1116 
1117  // Include the main program and compile/link everything together
1118  Plugin.call<PiApiKind::piProgramCompile>(Program.get(), /*num devices =*/1,
1119  &Device, CompileOptions.c_str(), 0,
1120  nullptr, nullptr, nullptr, nullptr);
1121  LinkPrograms.push_back(Program.get());
1122 
1123  RT::PiProgram LinkedProg = nullptr;
1124  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piProgramLink>(
1125  Context->getHandleRef(), /*num devices =*/1, &Device, LinkOptions.c_str(),
1126  LinkPrograms.size(), LinkPrograms.data(), nullptr, nullptr, &LinkedProg);
1127 
1128  // Link program call returns a new program object if all parameters are valid,
1129  // or NULL otherwise. Release the original (user) program.
1130  Program.reset(LinkedProg);
1131  if (Error != PI_SUCCESS) {
1132  if (LinkedProg) {
1133  // A non-trivial error occurred during linkage: get a build log, release
1134  // an incomplete (but valid) LinkedProg, and throw.
1135  throw compile_program_error(getProgramBuildLog(LinkedProg, Context),
1136  Error);
1137  }
1138  Plugin.checkPiResult(Error);
1139  }
1140  return Program;
1141 }
1142 
1143 static ProgramManager::KernelArgMask
1145  const int NBytesForSize = 8;
1146  const int NBitsInElement = 8;
1147  std::uint64_t SizeInBits = 0;
1148  for (int I = 0; I < NBytesForSize; ++I)
1149  SizeInBits |= static_cast<std::uint64_t>(Bytes[I]) << I * NBitsInElement;
1150 
1152  for (std::uint64_t I = 0; I < SizeInBits; ++I) {
1153  std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
1154  Result.push_back(Byte & (1 << (I % NBitsInElement)));
1155  }
1156 
1157  return Result;
1158 }
1159 
1160 void ProgramManager::cacheKernelUsesAssertInfo(OSModuleHandle M,
1161  RTDeviceBinaryImage &Img) {
1162  const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1163  Img.getAssertUsed();
1164  if (AssertUsedRange.isAvailable())
1165  for (const auto &Prop : AssertUsedRange) {
1166  KernelNameWithOSModule Key{Prop->Name, M};
1167  m_KernelUsesAssert.insert(Key);
1168  }
1169 }
1170 
1171 bool ProgramManager::kernelUsesAssert(OSModuleHandle M,
1172  const std::string &KernelName) const {
1173  KernelNameWithOSModule Key{KernelName, M};
1174  return m_KernelUsesAssert.find(Key) != m_KernelUsesAssert.end();
1175 }
1176 
1177 void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
1178  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1179  const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1180 
1181  for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) {
1182  pi_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]);
1183  OSModuleHandle M = OSUtil::getOSModuleHandle(RawImg);
1184  const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
1185  const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
1186  auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);
1187  static uint32_t SequenceID = 0;
1188 
1189  // Fill the kernel argument mask map
1190  const RTDeviceBinaryImage::PropertyRange &KPOIRange =
1191  Img->getKernelParamOptInfo();
1192  if (KPOIRange.isAvailable()) {
1193  KernelNameToArgMaskMap &ArgMaskMap =
1194  m_EliminatedKernelArgMasks[Img.get()];
1195  for (const auto &Info : KPOIRange)
1196  ArgMaskMap[Info->Name] =
1197  createKernelArgMask(DeviceBinaryProperty(Info).asByteArray());
1198  }
1199 
1200  // Fill maps for kernel bundles
1201  if (EntriesB != EntriesE) {
1202  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1203 
1204  // Register all exported symbols
1205  auto ExportedSymbols = Img->getExportedSymbols();
1206  for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols)
1207  m_ExportedSymbols.insert(ExportedSymbol->Name);
1208 
1209  m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);
1210 
1211  for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1212  ++EntriesIt) {
1213 
1214  // Skip creating unique kernel ID if it is a service kernel.
1215  // SYCL service kernels are identified by having
1216  // __sycl_service_kernel__ in the mangled name, primarily as part of
1217  // the namespace of the name type.
1218  if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
1219  m_ServiceKernels.insert(EntriesIt->name);
1220  continue;
1221  }
1222 
1223  // Skip creating unique kernel ID if it is an exported device
1224  // function. Exported device functions appear in the offload entries
1225  // among kernels, but are identifiable by being listed in properties.
1226  if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1227  continue;
1228 
1229  // ... and create a unique kernel ID for the entry
1230  auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1231  if (It == m_KernelName2KernelIDs.end()) {
1232  std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1233  std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1234  sycl::kernel_id KernelID =
1235  detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1236 
1237  It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name,
1238  KernelID);
1239  }
1240 
1241  m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1242  m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1243  }
1244 
1245  // Sort kernel ids for faster search
1246  std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1247  m_BinImg2KernelIDs[Img.get()]->end(), LessByHash<kernel_id>{});
1248  }
1249 
1250  // TODO: Remove the code below once program manager works trought kernel
1251  // bundles only
1252  // Use the entry information if it's available
1253  if (EntriesB != EntriesE) {
1254  // The kernel sets for any pair of images are either disjoint or
1255  // identical, look up the kernel set using the first kernel name...
1256  StrToKSIdMap &KSIdMap = m_KernelSets[M];
1257  auto KSIdIt = KSIdMap.find(EntriesB->name);
1258  if (KSIdIt != KSIdMap.end()) {
1259  auto &Imgs = m_DeviceImages[KSIdIt->second];
1260  assert(Imgs && "Device image vector should have been already created");
1261  if (DumpImages) {
1262  const bool NeedsSequenceID =
1263  std::any_of(Imgs->begin(), Imgs->end(), [&](auto &I) {
1264  return I->getFormat() == Img->getFormat();
1265  });
1266  dumpImage(*Img, KSIdIt->second, NeedsSequenceID ? ++SequenceID : 0);
1267  }
1268 
1269  cacheKernelUsesAssertInfo(M, *Img);
1270 
1271  Imgs->push_back(std::move(Img));
1272  continue;
1273  }
1274  // ... or create the set first if it hasn't been
1275  KernelSetId KSId = getNextKernelSetId();
1276  {
1277  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1278 
1279  for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1280  ++EntriesIt) {
1281  KSIdMap.insert(std::make_pair(EntriesIt->name, KSId));
1282  }
1283  }
1284  // ... and initialize associated device_global information
1285  {
1286  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1287 
1288  auto DeviceGlobals = Img->getDeviceGlobals();
1289  for (const pi_device_binary_property &DeviceGlobal : DeviceGlobals) {
1290  ByteArray DeviceGlobalInfo =
1291  DeviceBinaryProperty(DeviceGlobal).asByteArray();
1292 
1293  // The supplied device_global info property is expected to contain:
1294  // * 8 bytes - Size of the property.
1295  // * 4 bytes - Size of the underlying type in the device_global.
1296  // * 4 bytes - 0 if device_global has device_image_scope and any value
1297  // otherwise.
1298  DeviceGlobalInfo.dropBytes(8);
1299  auto [TypeSize, DeviceImageScopeDecorated] =
1300  DeviceGlobalInfo.consume<std::uint32_t, std::uint32_t>();
1301  assert(DeviceGlobalInfo.empty() && "Extra data left!");
1302 
1303  // Give the image pointer as an identifier for the image the
1304  // device-global is associated with.
1305  uintptr_t ImgId = reinterpret_cast<uintptr_t>(Img.get());
1306 
1307  auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1308  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1309  // If it has already been registered we update the information.
1310  ExistingDeviceGlobal->second->initialize(ImgId, KSId, TypeSize,
1311  DeviceImageScopeDecorated);
1312  } else {
1313  // If it has not already been registered we create a new entry.
1314  // Note: Pointer to the device global is not available here, so it
1315  // cannot be set until registration happens.
1316  auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1317  DeviceGlobal->Name, ImgId, KSId, TypeSize,
1318  DeviceImageScopeDecorated);
1319  m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1320  }
1321  }
1322  }
1323  m_DeviceImages[KSId].reset(new std::vector<RTDeviceBinaryImageUPtr>());
1324  cacheKernelUsesAssertInfo(M, *Img);
1325 
1326  if (DumpImages)
1327  dumpImage(*Img, KSId);
1328  m_DeviceImages[KSId]->push_back(std::move(Img));
1329 
1330  continue;
1331  }
1332  // Otherwise assume that the image contains all kernels associated with the
1333  // module
1334  KernelSetId &KSId = m_OSModuleKernelSets[M];
1335  if (KSId == 0)
1336  KSId = getNextKernelSetId();
1337 
1338  auto &Imgs = m_DeviceImages[KSId];
1339  if (!Imgs)
1340  Imgs.reset(new std::vector<RTDeviceBinaryImageUPtr>());
1341 
1342  cacheKernelUsesAssertInfo(M, *Img);
1343 
1344  if (DumpImages)
1345  dumpImage(*Img, KSId);
1346  Imgs->push_back(std::move(Img));
1347  }
1348 }
1349 
1350 void ProgramManager::debugPrintBinaryImages() const {
1351  for (const auto &ImgVecIt : m_DeviceImages) {
1352  std::cerr << " ++++++ Kernel set: " << ImgVecIt.first << "\n";
1353  for (const auto &Img : *ImgVecIt.second)
1354  Img.get()->print();
1355  }
1356 }
1357 
1358 KernelSetId ProgramManager::getNextKernelSetId() const {
1359  // No need for atomic, should be guarded by the caller
1360  static KernelSetId Result = LastKSId;
1361  return ++Result;
1362 }
1363 
1365 ProgramManager::getKernelSetId(OSModuleHandle M,
1366  const std::string &KernelName) const {
1367  // If the env var instructs to use image from a file,
1368  // return the kernel set associated with it
1369  if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1370  return SpvFileKSId;
1371  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1372  auto KSIdMapIt = m_KernelSets.find(M);
1373  if (KSIdMapIt != m_KernelSets.end()) {
1374  const StrToKSIdMap &KSIdMap = KSIdMapIt->second;
1375  auto KSIdIt = KSIdMap.find(KernelName);
1376  // If the kernel has been assigned to a kernel set, return it
1377  if (KSIdIt != KSIdMap.end())
1378  return KSIdIt->second;
1379  }
1380  // If no kernel set was found check if there is a kernel set containing
1381  // all kernels in the given module
1382  auto ModuleKSIdIt = m_OSModuleKernelSets.find(M);
1383  if (ModuleKSIdIt != m_OSModuleKernelSets.end())
1384  return ModuleKSIdIt->second;
1385 
1386  throw runtime_error("No kernel named " + KernelName + " was found",
1387  PI_ERROR_INVALID_KERNEL_NAME);
1388 }
1389 
1390 void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, KernelSetId KSId,
1391  uint32_t SequenceID) const {
1392  std::string Fname("sycl_");
1393  const pi_device_binary_struct &RawImg = Img.getRawData();
1394  Fname += RawImg.DeviceTargetSpec;
1395  Fname += std::to_string(KSId);
1396  if (SequenceID)
1397  Fname += '_' + std::to_string(SequenceID);
1398  std::string Ext;
1399 
1400  RT::PiDeviceBinaryType Format = Img.getFormat();
1401  if (Format == PI_DEVICE_BINARY_TYPE_SPIRV)
1402  Ext = ".spv";
1403  else if (Format == PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE)
1404  Ext = ".bc";
1405  else
1406  Ext = ".bin";
1407  Fname += Ext;
1408 
1409  std::ofstream F(Fname, std::ios::binary);
1410 
1411  if (!F.is_open()) {
1412  throw runtime_error("Can not write " + Fname, PI_ERROR_UNKNOWN);
1413  }
1414  Img.dump(F);
1415  F.close();
1416 }
1417 
1418 void ProgramManager::flushSpecConstants(const program_impl &Prg,
1419  RT::PiProgram NativePrg,
1420  const RTDeviceBinaryImage *Img) {
1421  if (DbgProgMgr > 2) {
1422  std::cerr << ">>> ProgramManager::flushSpecConstants(" << Prg.get()
1423  << ",...)\n";
1424  }
1425  if (!Prg.hasSetSpecConstants())
1426  return; // nothing to do
1427  pi::PiProgram PrgHandle = Prg.getHandleRef();
1428  // program_impl can't correspond to two different native programs
1429  assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1430  NativePrg = NativePrg ? NativePrg : PrgHandle;
1431 
1432  if (!Img) {
1433  // caller hasn't provided the image object - find it
1434  { // make sure NativePrograms map access is synchronized
1435  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1436  auto It = NativePrograms.find(NativePrg);
1437  if (It == NativePrograms.end())
1438  throw sycl::ext::oneapi::experimental::spec_const_error(
1439  "spec constant is set in a program w/o a binary image",
1440  PI_ERROR_INVALID_OPERATION);
1441  Img = It->second;
1442  }
1443  if (!Img->supportsSpecConstants()) {
1444  if (DbgProgMgr > 0)
1445  std::cerr << ">>> ProgramManager::flushSpecConstants: binary image "
1446  << &Img->getRawData() << " doesn't support spec constants\n";
1447  // This device binary image does not support runtime setting of
1448  // specialization constants; compiler must have generated default values.
1449  // NOTE: Can't throw here, as it would always take place with AOT
1450  //-compiled code. New Khronos 2020 spec should fix this inconsistency.
1451  return;
1452  }
1453  }
1454  Prg.flush_spec_constants(*Img, NativePrg);
1455 }
1456 
1457 // If the kernel is loaded from spv file, it may not include DeviceLib require
1458 // mask, sycl runtime won't know which fallback device libraries are needed. In
1459 // such case, the safest way is to load all fallback device libraries.
1460 uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) {
1461  const RTDeviceBinaryImage::PropertyRange &DLMRange =
1462  Img.getDeviceLibReqMask();
1463  if (DLMRange.isAvailable())
1464  return DeviceBinaryProperty(*(DLMRange.begin())).asUint32();
1465  else
1466  return 0xFFFFFFFF;
1467 }
1468 
1469 // TODO consider another approach with storing the masks in the integration
1470 // header instead.
1471 ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask(
1472  OSModuleHandle M, pi::PiProgram NativePrg, const std::string &KernelName) {
1473  // If instructed to use a spv file, assume no eliminated arguments.
1474  if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1475  return {};
1476 
1477  // Bail out if there are no eliminated kernel arg masks in our images
1478  if (m_EliminatedKernelArgMasks.empty())
1479  return {};
1480 
1481  {
1482  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1483  auto ImgIt = NativePrograms.find(NativePrg);
1484  if (ImgIt != NativePrograms.end()) {
1485  auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1486  if (MapIt != m_EliminatedKernelArgMasks.end())
1487  return MapIt->second[KernelName];
1488  return {};
1489  }
1490  }
1491 
1492  // If the program was not cached iterate over all available images looking for
1493  // the requested kernel
1494  for (auto &Elem : m_EliminatedKernelArgMasks) {
1495  auto ArgMask = Elem.second.find(KernelName);
1496  if (ArgMask != Elem.second.end())
1497  return ArgMask->second;
1498  }
1499 
1500  // The kernel is not generated by DPCPP stack, so a mask doesn't exist for it
1501  return {};
1502 }
1503 
1505  auto IsAOTBinary = [](const char *Format) {
1506  return (
1507  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) ||
1508  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) ||
1509  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0));
1510  };
1511 
1512  // There are only two initial states so far - SPIRV which needs to be compiled
1513  // and linked and fully compiled(AOTed) binary
1514 
1515  const bool IsAOT = IsAOTBinary(BinImage->getRawData().DeviceTargetSpec);
1516 
1517  return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1518 }
1519 
1521  const device &Dev) {
1522  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1524  auto &Plugin = DeviceImpl->getPlugin();
1525 
1526  const RT::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef();
1527 
1528  // Call piextDeviceSelectBinary with only one image to check if an image is
1529  // compatible with implementation. The function returns invalid index if no
1530  // device images are compatible.
1531  pi_uint32 SuitableImageID = std::numeric_limits<pi_uint32>::max();
1532  pi_device_binary DevBin =
1533  const_cast<pi_device_binary>(&BinImage->getRawData());
1534  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piextDeviceSelectBinary>(
1535  PIDeviceHandle, &DevBin,
1536  /*num bin images = */ (pi_uint32)1, &SuitableImageID);
1537  if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1538  throw runtime_error("Invalid binary image or device",
1539  PI_ERROR_INVALID_VALUE);
1540 
1541  return (0 == SuitableImageID);
1542 }
1543 
1544 kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) {
1545  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1546 
1547  auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1548  if (KernelID == m_KernelName2KernelIDs.end())
1549  throw runtime_error("No kernel found with the specified name",
1550  PI_ERROR_INVALID_KERNEL_NAME);
1551 
1552  return KernelID->second;
1553 }
1554 
1555 bool ProgramManager::hasCompatibleImage(const device &Dev) {
1556  std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1557 
1558  return std::any_of(
1559  m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1560  [&](std::pair<RTDeviceBinaryImage *,
1561  std::shared_ptr<std::vector<kernel_id>>>
1562  Elem) { return compatibleWithDevice(Elem.first, Dev); });
1563 }
1564 
1565 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1566  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1567 
1568  std::vector<sycl::kernel_id> AllKernelIDs;
1569  AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1570  for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1571  AllKernelIDs.push_back(KernelID.second);
1572  }
1573  return AllKernelIDs;
1574 }
1575 
1576 kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) {
1577  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1578 
1579  auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1580  if (KernelID == m_BuiltInKernelIDs.end()) {
1581  auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1582  auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1583  KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1584  }
1585 
1586  return KernelID->second;
1587 }
1588 
1589 void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
1590  const char *UniqueId) {
1591  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1592 
1593  auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1594  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1595  // Update the existing information and add the entry to the pointer map.
1596  ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1597  m_Ptr2DeviceGlobal.insert(
1598  {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1599  return;
1600  }
1601 
1602  auto EntryUPtr =
1603  std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1604  auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1605  m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1606 }
1607 
1608 std::set<RTDeviceBinaryImage *>
1609 ProgramManager::getRawDeviceImages(const std::vector<kernel_id> &KernelIDs) {
1610  std::set<RTDeviceBinaryImage *> BinImages;
1611  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1612  for (const kernel_id &KID : KernelIDs) {
1613  auto Range = m_KernelIDs2BinImage.equal_range(KID);
1614  for (auto It = Range.first, End = Range.second; It != End; ++It)
1615  BinImages.insert(It->second);
1616  }
1617  return BinImages;
1618 }
1619 
1621 ProgramManager::getDeviceGlobalEntry(const void *DeviceGlobalPtr) {
1622  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1623  auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1624  assert(Entry != m_Ptr2DeviceGlobal.end() && "Device global entry not found");
1625  return Entry->second;
1626 }
1627 
1628 std::vector<DeviceGlobalMapEntry *> ProgramManager::getDeviceGlobalEntries(
1629  const std::vector<std::string> &UniqueIds,
1630  bool ExcludeDeviceImageScopeDecorated) {
1631  std::vector<DeviceGlobalMapEntry *> FoundEntries;
1632  FoundEntries.reserve(UniqueIds.size());
1633 
1634  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1635  for (const std::string &UniqueId : UniqueIds) {
1636  auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1637  assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1638  "Device global not found in map.");
1639  if (!ExcludeDeviceImageScopeDecorated ||
1640  !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1641  FoundEntries.push_back(DeviceGlobalEntry->second.get());
1642  }
1643  return FoundEntries;
1644 }
1645 
1646 device_image_plain ProgramManager::getDeviceImageFromBinaryImage(
1647  RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev) {
1648  const bundle_state ImgState = getBinImageState(BinImage);
1649 
1650  assert(compatibleWithDevice(BinImage, Dev));
1651 
1652  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1653  // Collect kernel names for the image.
1654  {
1655  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1656  KernelIDs = m_BinImg2KernelIDs[BinImage];
1657  }
1658 
1659  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1660  BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1661  /*PIProgram=*/nullptr);
1662 
1663  return createSyclObjFromImpl<device_image_plain>(Impl);
1664 }
1665 
1666 std::vector<device_image_plain>
1667 ProgramManager::getSYCLDeviceImagesWithCompatibleState(
1668  const context &Ctx, const std::vector<device> &Devs,
1669  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs) {
1670 
1671  // Collect unique raw device images taking into account kernel ids passed
1672  // TODO: Can we avoid repacking?
1673  std::set<RTDeviceBinaryImage *> BinImages;
1674  if (!KernelIDs.empty()) {
1675  for (const auto &KID : KernelIDs) {
1676  bool isCompatibleWithAtLeastOneDev =
1677  std::any_of(Devs.begin(), Devs.end(), [&KID](const auto &Dev) {
1678  return sycl::is_compatible({KID}, Dev);
1679  });
1680  if (!isCompatibleWithAtLeastOneDev)
1681  throw sycl::exception(
1682  make_error_code(errc::invalid),
1683  "Kernel is incompatible with all devices in devs");
1684  }
1685  BinImages = getRawDeviceImages(KernelIDs);
1686  } else {
1687  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1688  for (auto &ImagesSets : m_DeviceImages) {
1689  auto &ImagesUPtrs = *ImagesSets.second.get();
1690  for (auto &ImageUPtr : ImagesUPtrs)
1691  BinImages.insert(ImageUPtr.get());
1692  }
1693  }
1694  assert(BinImages.size() > 0 && "Expected to find at least one device image");
1695 
1696  // Ignore images with incompatible state. Image is considered compatible
1697  // with a target state if an image is already in the target state or can
1698  // be brought to target state by compiling/linking/building.
1699  //
1700  // Example: an image in "executable" state is not compatible with
1701  // "input" target state - there is no operation to convert the image it
1702  // to "input" state. An image in "input" state is compatible with
1703  // "executable" target state because it can be built to get into
1704  // "executable" state.
1705  for (auto It = BinImages.begin(); It != BinImages.end();) {
1706  if (getBinImageState(*It) > TargetState)
1707  It = BinImages.erase(It);
1708  else
1709  ++It;
1710  }
1711 
1712  std::vector<device_image_plain> SYCLDeviceImages;
1713 
1714  // If a non-input state is requested, we can filter out some compatible
1715  // images and return only those with the highest compatible state for each
1716  // device-kernel pair. This map tracks how many kernel-device pairs need each
1717  // image, so that any unneeded ones are skipped.
1718  // TODO this has no effect if the requested state is input, consider having
1719  // a separate branch for that case to avoid unnecessary tracking work.
1720  struct DeviceBinaryImageInfo {
1721  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1722  bundle_state State = bundle_state::input;
1723  int RequirementCounter = 0;
1724  };
1725  std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
1726 
1727  for (const sycl::device &Dev : Devs) {
1728  // Track the highest image state for each requested kernel.
1729  using StateImagesPairT =
1730  std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
1731  using KernelImageMapT =
1732  std::map<kernel_id, StateImagesPairT, LessByNameComp>;
1733  KernelImageMapT KernelImageMap;
1734  if (!KernelIDs.empty())
1735  for (const kernel_id &KernelID : KernelIDs)
1736  KernelImageMap.insert({KernelID, {}});
1737 
1738  for (RTDeviceBinaryImage *BinImage : BinImages) {
1739  if (!compatibleWithDevice(BinImage, Dev) ||
1740  !doesDevSupportDeviceRequirements(Dev, *BinImage))
1741  continue;
1742 
1743  auto InsertRes = ImageInfoMap.insert({BinImage, {}});
1744  DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
1745  if (InsertRes.second) {
1746  ImgInfo.State = getBinImageState(BinImage);
1747  // Collect kernel names for the image
1748  {
1749  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1750  ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
1751  }
1752  }
1753  const bundle_state ImgState = ImgInfo.State;
1754  const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
1755  ImgInfo.KernelIDs;
1756  int &ImgRequirementCounter = ImgInfo.RequirementCounter;
1757 
1758  // If the image does not contain any non-service kernels we can skip it.
1759  if (!ImageKernelIDs || ImageKernelIDs->empty())
1760  continue;
1761 
1762  // Update tracked information.
1763  for (kernel_id &KernelID : *ImageKernelIDs) {
1764  StateImagesPairT *StateImagesPair;
1765  // If only specific kernels are requested, ignore the rest.
1766  if (!KernelIDs.empty()) {
1767  auto It = KernelImageMap.find(KernelID);
1768  if (It == KernelImageMap.end())
1769  continue;
1770  StateImagesPair = &It->second;
1771  } else
1772  StateImagesPair = &KernelImageMap[KernelID];
1773 
1774  auto &[KernelImagesState, KernelImages] = *StateImagesPair;
1775 
1776  if (KernelImages.empty()) {
1777  KernelImagesState = ImgState;
1778  KernelImages.push_back(BinImage);
1779  ++ImgRequirementCounter;
1780  } else if (KernelImagesState < ImgState) {
1781  for (RTDeviceBinaryImage *Img : KernelImages) {
1782  auto It = ImageInfoMap.find(Img);
1783  assert(It != ImageInfoMap.end());
1784  assert(It->second.RequirementCounter > 0);
1785  --(It->second.RequirementCounter);
1786  }
1787  KernelImages.clear();
1788  KernelImages.push_back(BinImage);
1789  KernelImagesState = ImgState;
1790  ++ImgRequirementCounter;
1791  } else if (KernelImagesState == ImgState) {
1792  KernelImages.push_back(BinImage);
1793  ++ImgRequirementCounter;
1794  }
1795  }
1796  }
1797  }
1798 
1799  for (const auto &ImgInfoPair : ImageInfoMap) {
1800  if (ImgInfoPair.second.RequirementCounter == 0)
1801  continue;
1802 
1803  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1804  ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
1805  ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
1806 
1807  SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
1808  }
1809 
1810  return SYCLDeviceImages;
1811 }
1812 
1813 void ProgramManager::bringSYCLDeviceImagesToState(
1814  std::vector<device_image_plain> &DeviceImages, bundle_state TargetState) {
1815 
1816  for (device_image_plain &DevImage : DeviceImages) {
1817  const bundle_state DevImageState = getSyclObjImpl(DevImage)->get_state();
1818 
1819  switch (TargetState) {
1820  case bundle_state::input:
1821  // Do nothing since there is no state which can be upgraded to the input.
1822  assert(DevImageState == bundle_state::input);
1823  break;
1824  case bundle_state::object:
1825  if (DevImageState == bundle_state::input) {
1826  DevImage = compile(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1827  /*PropList=*/{});
1828  break;
1829  }
1830  // Device image is expected to be object state then.
1831  assert(DevImageState == bundle_state::object);
1832  break;
1833  case bundle_state::executable: {
1834  switch (DevImageState) {
1835  case bundle_state::input:
1836  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1837  /*PropList=*/{});
1838  break;
1839  case bundle_state::object: {
1840  std::vector<device_image_plain> LinkedDevImages =
1841  link({DevImage}, getSyclObjImpl(DevImage)->get_devices(),
1842  /*PropList=*/{});
1843  // Since only one device image is passed here one output device image is
1844  // expected
1845  assert(LinkedDevImages.size() == 1 && "Expected one linked image here");
1846  DevImage = LinkedDevImages[0];
1847  break;
1848  }
1849  case bundle_state::executable:
1850  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1851  /*PropList=*/{});
1852  break;
1853  }
1854  break;
1855  }
1856  }
1857  }
1858 }
1859 
1860 std::vector<device_image_plain>
1861 ProgramManager::getSYCLDeviceImages(const context &Ctx,
1862  const std::vector<device> &Devs,
1863  bundle_state TargetState) {
1864  // Collect device images with compatible state
1865  std::vector<device_image_plain> DeviceImages =
1866  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1867  // Bring device images with compatible state to desired state.
1868  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1869  return DeviceImages;
1870 }
1871 
1872 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1873  const context &Ctx, const std::vector<device> &Devs,
1874  const DevImgSelectorImpl &Selector, bundle_state TargetState) {
1875  // Collect device images with compatible state
1876  std::vector<device_image_plain> DeviceImages =
1877  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1878 
1879  // Filter out images that are rejected by Selector
1880  auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1881  [&Selector](const device_image_plain &Image) {
1882  return !Selector(getSyclObjImpl(Image));
1883  });
1884  DeviceImages.erase(It, DeviceImages.end());
1885 
1886  // The spec says that the function should not call online compiler or linker
1887  // to translate device images into target state
1888  return DeviceImages;
1889 }
1890 
1891 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1892  const context &Ctx, const std::vector<device> &Devs,
1893  const std::vector<kernel_id> &KernelIDs, bundle_state TargetState) {
1894  // Fast path for when no kernel IDs are requested
1895  if (KernelIDs.empty())
1896  return {};
1897 
1898  {
1899  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1900 
1901  for (auto &It : m_BuiltInKernelIDs) {
1902  if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1903  KernelIDs.end())
1904  throw sycl::exception(make_error_code(errc::kernel_argument),
1905  "Attempting to use a built-in kernel. They are "
1906  "not fully supported");
1907  }
1908  }
1909 
1910  // Collect device images with compatible state
1911  std::vector<device_image_plain> DeviceImages =
1912  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1913 
1914  // Bring device images with compatible state to desired state.
1915  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1916  return DeviceImages;
1917 }
1918 
1919 static void
1920 setSpecializationConstants(const std::shared_ptr<device_image_impl> &InputImpl,
1921  RT::PiProgram Prog, const plugin &Plugin) {
1922  // Set ITT annotation specialization constant if needed.
1923  enableITTAnnotationsIfNeeded(Prog, Plugin);
1924 
1925  std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
1926  const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
1927  &SpecConstData = InputImpl->get_spec_const_data_ref();
1928  const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
1929 
1930  // Set all specialization IDs from descriptors in the input device image.
1931  for (const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
1932  std::ignore = SpecConstNames;
1933  for (const device_image_impl::SpecConstDescT &SpecIDDesc : SpecConstDescs) {
1934  if (SpecIDDesc.IsSet) {
1936  Prog, SpecIDDesc.ID, SpecIDDesc.Size,
1937  SpecConsts.data() + SpecIDDesc.BlobOffset);
1938  }
1939  }
1940  }
1941 }
1942 
1943 device_image_plain
1945  const std::vector<device> &Devs,
1946  const property_list &) {
1947 
1948  // TODO: Extract compile options from property list once the Spec clarifies
1949  // how they can be passed.
1950 
1951  // TODO: Probably we could have cached compiled device images.
1952  const std::shared_ptr<device_image_impl> &InputImpl =
1953  getSyclObjImpl(DeviceImage);
1954 
1955  const detail::plugin &Plugin =
1956  getSyclObjImpl(InputImpl->get_context())->getPlugin();
1957 
1958  // TODO: Add support for creating non-SPIRV programs from multiple devices.
1959  if (InputImpl->get_bin_image_ref()->getFormat() !=
1961  Devs.size() > 1)
1962  sycl::runtime_error(
1963  "Creating a program from AOT binary for multiple device is not "
1964  "supported",
1965  PI_ERROR_INVALID_OPERATION);
1966 
1967  // Device is not used when creating program from SPIRV, so passing only one
1968  // device is OK.
1969  RT::PiProgram Prog = createPIProgram(*InputImpl->get_bin_image_ref(),
1970  InputImpl->get_context(), Devs[0]);
1971 
1972  if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
1973  setSpecializationConstants(InputImpl, Prog, Plugin);
1974 
1975  DeviceImageImplPtr ObjectImpl = std::make_shared<detail::device_image_impl>(
1976  InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
1977  bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog,
1978  InputImpl->get_spec_const_data_ref(),
1979  InputImpl->get_spec_const_blob_ref());
1980 
1981  std::vector<pi_device> PIDevices;
1982  PIDevices.reserve(Devs.size());
1983  for (const device &Dev : Devs)
1984  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
1985 
1986  // TODO: Handle zero sized Device list.
1987  std::string CompileOptions;
1988  applyCompileOptionsFromEnvironment(CompileOptions);
1990  CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
1992  ObjectImpl->get_program_ref(), /*num devices=*/Devs.size(),
1993  PIDevices.data(), CompileOptions.c_str(),
1994  /*num_input_headers=*/0, /*input_headers=*/nullptr,
1995  /*header_include_names=*/nullptr,
1996  /*pfn_notify=*/nullptr, /*user_data*/ nullptr);
1997  if (Error != PI_SUCCESS)
1998  throw sycl::exception(
2000  getProgramBuildLog(ObjectImpl->get_program_ref(),
2001  getSyclObjImpl(ObjectImpl->get_context())));
2002 
2003  return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2004 }
2005 
2006 std::vector<device_image_plain>
2007 ProgramManager::link(const std::vector<device_image_plain> &DeviceImages,
2008  const std::vector<device> &Devs,
2009  const property_list &PropList) {
2010  (void)PropList;
2011 
2012  std::vector<pi_program> PIPrograms;
2013  PIPrograms.reserve(DeviceImages.size());
2014  for (const device_image_plain &DeviceImage : DeviceImages)
2015  PIPrograms.push_back(getSyclObjImpl(DeviceImage)->get_program_ref());
2016 
2017  std::vector<pi_device> PIDevices;
2018  PIDevices.reserve(Devs.size());
2019  for (const device &Dev : Devs)
2020  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
2021 
2022  std::string LinkOptionsStr;
2023  applyLinkOptionsFromEnvironment(LinkOptionsStr);
2024  if (LinkOptionsStr.empty()) {
2025  for (const device_image_plain &DeviceImage : DeviceImages) {
2026  const std::shared_ptr<device_image_impl> &InputImpl =
2027  getSyclObjImpl(DeviceImage);
2028  appendLinkOptionsFromImage(LinkOptionsStr,
2029  *(InputImpl->get_bin_image_ref()));
2030  }
2031  }
2032  const context &Context = getSyclObjImpl(DeviceImages[0])->get_context();
2033  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2034  const detail::plugin &Plugin = ContextImpl->getPlugin();
2035 
2036  RT::PiProgram LinkedProg = nullptr;
2038  ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
2039  /*options=*/LinkOptionsStr.c_str(), PIPrograms.size(), PIPrograms.data(),
2040  /*pfn_notify=*/nullptr,
2041  /*user_data=*/nullptr, &LinkedProg);
2042 
2043  if (Error != PI_SUCCESS) {
2044  if (LinkedProg) {
2045  const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2046  throw sycl::exception(make_error_code(errc::build), ErrorMsg);
2047  }
2048  Plugin.reportPiError(Error, "link()");
2049  }
2050 
2051  std::shared_ptr<std::vector<kernel_id>> KernelIDs{new std::vector<kernel_id>};
2052  std::vector<unsigned char> NewSpecConstBlob;
2053  device_image_impl::SpecConstMapT NewSpecConstMap;
2054  for (const device_image_plain &DeviceImage : DeviceImages) {
2055  std::shared_ptr<device_image_impl> DeviceImageImpl =
2056  getSyclObjImpl(DeviceImage);
2057 
2058  // Duplicates are not expected here, otherwise piProgramLink should fail
2059  KernelIDs->insert(KernelIDs->end(),
2060  DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2061  DeviceImageImpl->get_kernel_ids_ptr()->end());
2062 
2063  // To be able to answer queries about specialziation constants, the new
2064  // device image should have the specialization constants from all the linked
2065  // images.
2066  {
2067  const std::lock_guard<std::mutex> SpecConstLock(
2068  DeviceImageImpl->get_spec_const_data_lock());
2069 
2070  // Copy all map entries to the new map. Since the blob will be copied to
2071  // the end of the new blob we need to move the blob offset of each entry.
2072  for (const auto &SpecConstIt :
2073  DeviceImageImpl->get_spec_const_data_ref()) {
2074  std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2075  NewSpecConstMap[SpecConstIt.first];
2076  assert(NewDescEntries.empty() &&
2077  "Specialization constant already exists in the map.");
2078  NewDescEntries.reserve(SpecConstIt.second.size());
2079  for (const device_image_impl::SpecConstDescT &SpecConstDesc :
2080  SpecConstIt.second) {
2081  device_image_impl::SpecConstDescT NewSpecConstDesc = SpecConstDesc;
2082  NewSpecConstDesc.BlobOffset += NewSpecConstBlob.size();
2083  NewDescEntries.push_back(std::move(NewSpecConstDesc));
2084  }
2085  }
2086 
2087  // Copy the blob from the device image into the new blob. This moves the
2088  // offsets of the following blobs.
2089  NewSpecConstBlob.insert(
2090  NewSpecConstBlob.end(),
2091  DeviceImageImpl->get_spec_const_blob_ref().begin(),
2092  DeviceImageImpl->get_spec_const_blob_ref().end());
2093  }
2094  }
2095  // device_image_impl expects kernel ids to be sorted for fast search
2096  std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash<kernel_id>{});
2097 
2098  DeviceImageImplPtr ExecutableImpl =
2099  std::make_shared<detail::device_image_impl>(
2100  /*BinImage=*/nullptr, Context, Devs, bundle_state::executable,
2101  std::move(KernelIDs), LinkedProg, std::move(NewSpecConstMap),
2102  std::move(NewSpecConstBlob));
2103 
2104  // TODO: Make multiple sets of device images organized by devices they are
2105  // compiled for.
2106  return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2107 }
2108 
2109 // The function duplicates most of the code from existing getBuiltPIProgram.
2110 // The differences are:
2111 // Different API - uses different objects to extract required info
2112 // Supports caching of a program built for multiple devices
2114  const std::vector<device> &Devs,
2115  const property_list &PropList) {
2116  (void)PropList;
2117 
2118  const std::shared_ptr<device_image_impl> &InputImpl =
2119  getSyclObjImpl(DeviceImage);
2120 
2121  const context Context = InputImpl->get_context();
2122 
2123  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2124 
2125  using PiProgramT = KernelProgramCache::PiProgramT;
2126 
2127  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
2128 
2129  std::string CompileOpts;
2130  std::string LinkOpts;
2131  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
2132 
2133  const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref();
2134  const RTDeviceBinaryImage &Img = *ImgPtr;
2135 
2136  SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2137 
2138  // TODO: Unify this code with getBuiltPIProgram
2139  auto BuildF = [this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2140  &InputImpl, SpecConsts] {
2141  ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2142  const detail::plugin &Plugin = ContextImpl->getPlugin();
2143  applyOptionsFromImage(CompileOpts, LinkOpts, Img, Devs, Plugin);
2144 
2145  // TODO: Add support for creating non-SPIRV programs from multiple devices.
2146  if (InputImpl->get_bin_image_ref()->getFormat() !=
2148  Devs.size() > 1)
2149  sycl::runtime_error(
2150  "Creating a program from AOT binary for multiple device is not "
2151  "supported",
2152  PI_ERROR_INVALID_OPERATION);
2153 
2154  // Device is not used when creating program from SPIRV, so passing only one
2155  // device is OK.
2156  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
2157  Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2158 
2159  if (!DeviceCodeWasInCache &&
2160  InputImpl->get_bin_image_ref()->supportsSpecConstants())
2161  setSpecializationConstants(InputImpl, NativePrg, Plugin);
2162 
2163  ProgramPtr ProgramManaged(
2164  NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease);
2165 
2166  // Link a fallback implementation of device libraries if they are not
2167  // supported by a device compiler.
2168  // Pre-compiled programs are supposed to be already linked.
2169  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
2170  // no fallback device library will be linked.
2171  uint32_t DeviceLibReqMask = 0;
2172  if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV &&
2174  DeviceLibReqMask = getDeviceLibReqMask(Img);
2175 
2176  ProgramPtr BuiltProgram =
2177  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2178  getRawSyclObjImpl(Devs[0])->getHandleRef(), DeviceLibReqMask);
2179 
2180  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
2181 
2182  {
2183  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2184  NativePrograms[BuiltProgram.get()] = &Img;
2185  }
2186 
2187  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2188 
2189  // Save program to persistent cache if it is not there
2190  if (!DeviceCodeWasInCache)
2191  PersistentDeviceCodeCache::putItemToDisc(
2192  Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
2193 
2194  return BuiltProgram.release();
2195  };
2196 
2197  uint32_t ImgId = Img.getImageID();
2198  const RT::PiDevice PiDevice = getRawSyclObjImpl(Devs[0])->getHandleRef();
2199  auto CacheKey =
2200  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
2201  std::make_pair(PiDevice, CompileOpts + LinkOpts));
2202 
2203  // CacheKey is captured by reference so when we overwrite it later we can
2204  // reuse this function.
2205  auto GetCachedBuildF = [&Cache, &CacheKey]() {
2206  return Cache.getOrInsertProgram(CacheKey);
2207  };
2208 
2209  // TODO: Throw SYCL2020 style exception
2210  auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
2211  Cache, GetCachedBuildF, BuildF);
2212  // getOrBuild is not supposed to return nullptr
2213  assert(BuildResult != nullptr && "Invalid build result");
2214 
2215  RT::PiProgram ResProgram = BuildResult->Ptr.load();
2216 
2217  // Cache supports key with once device only, but here we have multiple
2218  // devices a program is built for, so add the program to the cache for all
2219  // other devices.
2220  const detail::plugin &Plugin = ContextImpl->getPlugin();
2221  auto CacheOtherDevices = [ResProgram, &Plugin]() {
2222  Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
2223  return ResProgram;
2224  };
2225 
2226  // The program for device "0" is already added to the cache during the first
2227  // call to getOrBuild, so starting with "1"
2228  for (size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2229  const RT::PiDevice PiDeviceAdd =
2230  getRawSyclObjImpl(Devs[Idx])->getHandleRef();
2231 
2232  // Change device in the cache key to reduce copying of spec const data.
2233  CacheKey.second.first = PiDeviceAdd;
2234  getOrBuild<PiProgramT, compile_program_error>(Cache, GetCachedBuildF,
2235  CacheOtherDevices);
2236  // getOrBuild is not supposed to return nullptr
2237  assert(BuildResult != nullptr && "Invalid build result");
2238  }
2239 
2240  // devive_image_impl shares ownership of PIProgram with, at least, program
2241  // cache. The ref counter will be descremented in the destructor of
2242  // device_image_impl
2243  Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
2244 
2245  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
2246  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
2247  InputImpl->get_kernel_ids_ptr(), ResProgram,
2248  InputImpl->get_spec_const_data_ref(),
2249  InputImpl->get_spec_const_blob_ref());
2250 
2251  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2252 }
2253 
2254 std::pair<RT::PiKernel, std::mutex *> ProgramManager::getOrCreateKernel(
2255  const context &Context, const std::string &KernelName,
2256  const property_list &PropList, RT::PiProgram Program) {
2257 
2258  (void)PropList;
2259 
2260  const ContextImplPtr Ctx = getSyclObjImpl(Context);
2261 
2262  using PiKernelT = KernelProgramCache::PiKernelT;
2263 
2264  KernelProgramCache &Cache = Ctx->getKernelProgramCache();
2265 
2266  auto BuildF = [&Program, &KernelName, &Ctx] {
2267  PiKernelT *Result = nullptr;
2268 
2269  const detail::plugin &Plugin = Ctx->getPlugin();
2270  Plugin.call<PiApiKind::piKernelCreate>(Program, KernelName.c_str(),
2271  &Result);
2272 
2274  sizeof(pi_bool), &PI_TRUE);
2275 
2276  return Result;
2277  };
2278 
2279  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2280  return Cache.getOrInsertKernel(Program, KernelName);
2281  };
2282 
2283  auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
2284  Cache, GetCachedBuildF, BuildF);
2285  // getOrBuild is not supposed to return nullptr
2286  assert(BuildResult != nullptr && "Invalid build result");
2287  return std::make_pair(BuildResult->Ptr.load(),
2288  &(BuildResult->MBuildResultMutex));
2289 }
2290 
2292  const RTDeviceBinaryImage &Img) {
2293  auto getPropIt = [&Img](const std::string &PropName) {
2294  const RTDeviceBinaryImage::PropertyRange &PropRange =
2295  Img.getDeviceRequirements();
2297  PropRange.begin(), PropRange.end(),
2299  return (*Prop)->Name == PropName;
2300  });
2301  return (PropIt == PropRange.end())
2302  ? std::nullopt
2303  : std::optional<
2305  };
2306 
2307  auto AspectsPropIt = getPropIt("aspects");
2308  auto ReqdWGSizePropIt = getPropIt("reqd_work_group_size");
2309 
2310  if (!AspectsPropIt && !ReqdWGSizePropIt)
2311  return true;
2312 
2313  // Checking if device supports defined aspects
2314  if (AspectsPropIt) {
2315  ByteArray Aspects =
2316  DeviceBinaryProperty(*(AspectsPropIt.value())).asByteArray();
2317  // Drop 8 bytes describing the size of the byte array.
2318  Aspects.dropBytes(8);
2319  while (!Aspects.empty()) {
2320  aspect Aspect = Aspects.consume<aspect>();
2321  // Strict check for fp64 is disabled temporarily to avoid confusion.
2322  if (!Dev.has(Aspect))
2323  return false;
2324  }
2325  }
2326 
2327  // Checking if device supports defined required work group size
2328  if (ReqdWGSizePropIt) {
2329  ByteArray ReqdWGSize =
2330  DeviceBinaryProperty(*(ReqdWGSizePropIt.value())).asByteArray();
2331  // Drop 8 bytes describing the size of the byte array.
2332  ReqdWGSize.dropBytes(8);
2333  int ReqdWGSizeAllDimsTotal = 1;
2334  std::vector<int> ReqdWGSizeVec;
2335  int Dims = 0;
2336  while (!ReqdWGSize.empty()) {
2337  int SingleDimSize = ReqdWGSize.consume<int>();
2338  ReqdWGSizeAllDimsTotal *= SingleDimSize;
2339  ReqdWGSizeVec.push_back(SingleDimSize);
2340  Dims++;
2341  }
2342  if (static_cast<size_t>(ReqdWGSizeAllDimsTotal) >
2343  Dev.get_info<info::device::max_work_group_size>())
2344  return false;
2345  // Creating std::variant to call max_work_item_sizes one time to avoid
2346  // performance drop
2347  std::variant<id<1>, id<2>, id<3>> MaxWorkItemSizesVariant;
2348  if (Dims == 1)
2349  MaxWorkItemSizesVariant =
2351  else if (Dims == 2)
2352  MaxWorkItemSizesVariant =
2354  else // (Dims == 3)
2355  MaxWorkItemSizesVariant =
2357  for (int i = 0; i < Dims; i++) {
2358  // Extracting value from std::variant to avoid dealing with type-safety
2359  // issues after that
2360  if (Dims == 1) {
2361  // ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes
2362  if (static_cast<size_t>(ReqdWGSizeVec[i]) >
2363  std::get<id<1>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2364  return false;
2365  } else if (Dims == 2) {
2366  if (static_cast<size_t>(ReqdWGSizeVec[i]) >
2367  std::get<id<2>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2368  return false;
2369  } else // (Dims == 3)
2370  if (static_cast<size_t>(ReqdWGSizeVec[i]) >
2371  std::get<id<3>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2372  return false;
2373  }
2374  }
2375  return true;
2376 }
2377 
2378 } // namespace detail
2379 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
2380 } // namespace sycl
2381 
2383  sycl::detail::ProgramManager::getInstance().addImages(desc);
2384 }
2385 
2386 // Executed as a part of current module's (.exe, .dll) static initialization
2388  (void)desc;
2389  // TODO implement the function
2390 }
sycl::_V1::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:723
sycl::_V1::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: helpers.cpp:21
sycl::_V1::detail::SpvFileKSId
constexpr KernelSetId SpvFileKSId
Definition: common.hpp:446
PI_DEVICE_BINARY_TYPE_NONE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE
Definition: pi.h:762
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_esimd_emulator.cpp:1366
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:24
aspects.hpp
sycl::_V1::detail::emitBuiltProgramInfo
static void emitBuiltProgramInfo(const pi_program &Prog, const ContextImplPtr &Context)
Emits information about built programs if the appropriate contitions are met, namely when SYCL_RT_WAR...
Definition: program_manager.cpp:477
event_impl.hpp
_pi_offload_entry_struct
Definition: pi.h:719
sycl::_V1::detail::pi::PiDeviceBinaryType
::pi_device_binary_type PiDeviceBinaryType
Definition: pi.hpp:127
sycl::_V1::backend
backend
Definition: backend_types.hpp:21
sycl::_V1::detail::RTDeviceBinaryImage::getRawData
const pi_device_binary_struct & getRawData() const
Definition: device_binary_image.hpp:157
sycl::_V1::detail::device_image_impl::SpecConstMapT
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
Definition: device_image_impl.hpp:56
pi_bool
pi_uint32 pi_bool
Definition: pi.h:131
context_impl.hpp
sycl::_V1::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:641
sycl::_V1::detail::device_image_impl::SpecConstDescT
Definition: device_image_impl.hpp:48
pi_device_binaries_struct::DeviceBinaries
pi_device_binary DeviceBinaries
Device binaries data.
Definition: pi.h:947
sycl::_V1::detail::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:30
sycl::_V1::detail::RTDeviceBinaryImage::getSize
size_t getSize() const
Definition: device_binary_image.hpp:162
PI_DEVICE_BINARY_TYPE_SPIRV
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV
Definition: pi.h:767
type_traits.hpp
PI_DEVICE_INFO_NAME
@ PI_DEVICE_INFO_NAME
Definition: pi.h:272
sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:92
sycl::_V1::detail::doesDevSupportDeviceRequirements
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:2291
pi_device_binary_struct::DeviceTargetSpec
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
Definition: pi.h:862
config.hpp
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:1230
sycl::_V1::detail::DeviceLibExtensionStrs
static const std::map< DeviceLibExt, const char * > DeviceLibExtensionStrs
Definition: program_manager.cpp:824
PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
Definition: pi.h:769
stl.hpp
sycl::_V1::detail::RTDeviceBinaryImage::getDeviceLibReqMask
const PropertyRange & getDeviceLibReqMask() const
Definition: device_binary_image.hpp:214
device.hpp
sycl::_V1::errc::feature_not_supported
@ feature_not_supported
sycl::_V1::detail::DeviceImageImplPtr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
Definition: kernel_bundle.hpp:72
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_esimd_emulator.cpp:1353
sycl::_V1::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:676
sycl::_V1::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:59
sycl::_V1::detail::getDeviceLibFilename
static const char * getDeviceLibFilename(DeviceLibExt Extension, bool Native)
Definition: program_manager.cpp:810
pi_device_binary_struct::BinaryStart
const unsigned char * BinaryStart
Pointer to the target code start.
Definition: pi.h:874
sycl::_V1::detail::ByteArray::empty
bool empty() const
Definition: device_binary_image.hpp:48
sycl::_V1::detail::RTDeviceBinaryImage::getDeviceRequirements
const PropertyRange & getDeviceRequirements() const
Definition: device_binary_image.hpp:222
sycl::_V1::context::get_devices
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:139
sycl::_V1::detail::DeviceBinaryProperty::asByteArray
ByteArray asByteArray() const
Definition: device_binary_image.cpp:69
sycl::_V1::detail::pi::PiDevice
::pi_device PiDevice
Definition: pi.hpp:124
context.hpp
sycl::_V1::detail::SYCLConfig
Definition: config.hpp:110
sycl::_V1::detail::program_impl::get
cl_program get() const
Returns a valid cl_program instance.
Definition: program_impl.cpp:216
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:867
os_util.hpp
sycl::_V1::detail::DeviceGlobalMapEntry
Definition: device_global_map_entry.hpp:81
sycl::_V1::detail::program_impl::get_build_options
std::string get_build_options() const
Returns the compile, link, or build options, from whichever of those operations was performed most re...
Definition: program_impl.hpp:304
sycl::_V1::detail::KernelProgramCache::saveKernel
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
Definition: kernel_program_cache.hpp:180
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:939
pi_device_binary_struct::BinaryEnd
const unsigned char * BinaryEnd
Pointer to the target code end.
Definition: pi.h:876
sycl::_V1::info::device::max_work_item_sizes
Definition: info_desc.hpp:106
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::detail::applyCompileOptionsFromEnvironment
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts)
Definition: program_manager.cpp:425
sycl::_V1::detail::PiInfoCode
Definition: info_desc_helpers.hpp:18
sycl::_V1::detail::program_impl::hasSetSpecConstants
bool hasSetSpecConstants() const
Tells whether a specialization constant has been set for this program.
Definition: program_impl.hpp:335
PI_USM_INDIRECT_ACCESS
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1377
device_impl.hpp
sycl::_V1::detail::KernelProgramCache::PiKernelT
std::remove_pointer< RT::PiKernel >::type PiKernelT
Definition: kernel_program_cache.hpp:87
sycl::_V1::detail::plugin::getBackend
backend getBackend(void) const
Definition: plugin.hpp:229
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
PI_CONTEXT_INFO_NUM_DEVICES
@ PI_CONTEXT_INFO_NUM_DEVICES
Definition: pi.h:350
sycl::_V1::detail::make_tuple
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:36
queue_impl.hpp
sycl::_V1::detail::getRawSyclObjImpl
detail::add_pointer_t< typename decltype(T::impl)::element_type > getRawSyclObjImpl(const T &SyclObject)
Definition: common.hpp:310
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::size
size_t size() const
Definition: device_binary_image.hpp:116
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:793
PI_KERNEL_INFO_PROGRAM
@ PI_KERNEL_INFO_PROGRAM
Definition: pi.h:381
sycl::_V1::detail::KernelProgramCache::BuildState
BuildState
Denotes the state of a build.
Definition: kernel_program_cache.hpp:43
sycl::_V1::id
A unique identifier of an item in an index space.
Definition: array.hpp:17
sycl::_V1::detail::KernelProgramCache::BuildResult
Denotes pointer to some entity with its general state and build error.
Definition: kernel_program_cache.hpp:49
sycl::_V1::exception::get_cl_code
cl_int get_cl_code() const
Definition: exception.cpp:85
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::ConstIterator
Definition: device_binary_image.hpp:90
sycl::_V1::detail::program_impl::flush_spec_constants
void flush_spec_constants(const RTDeviceBinaryImage &Img, RT::PiProgram NativePrg=nullptr) const
Takes current values of specialization constants and "injects" them into the underlying native progra...
Definition: program_impl.cpp:525
std::clog
__SYCL_EXTERN_STREAM_ATTRS ostream clog
Linked to standard error (buffered)
sycl::_V1::detail::getDeviceLibPrograms
static std::vector< RT::PiProgram > getDeviceLibPrograms(const ContextImplPtr Context, const RT::PiDevice &Device, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:997
sycl::_V1::detail::KernelProgramCache::PiProgramT
std::remove_pointer< RT::PiProgram >::type PiProgramT
Definition: kernel_program_cache.hpp:71
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1325
compile_program_error
sycl::_V1::detail::createBinaryProgram
static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< pi_device_binary_property > Metadata)
Definition: program_manager.cpp:70
sycl::_V1::detail::DeviceBinaryProperty::asUint32
pi_uint32 asUint32() const
Definition: device_binary_image.cpp:61
sycl::_V1::detail::applyOptionsFromImage
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devices, const detail::plugin &Plugin)
Definition: program_manager.cpp:416
sycl::_V1::detail::ByteArray
Definition: device_binary_image.hpp:23
sycl::_V1::detail::isDeviceLibRequired
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:989
sycl::_V1::detail::DeviceBinaryProperty
Definition: device_binary_image.hpp:64
sycl::_V1::detail::getDeviceLibExtensionStr
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
Definition: program_manager.cpp:839
sycl::_V1::detail::RTDeviceBinaryImage::getLinkOptions
const char * getLinkOptions() const
Definition: device_binary_image.hpp:172
sycl::_V1::detail::RTDeviceBinaryImage::getDeviceGlobals
const PropertyRange & getDeviceGlobals() const
Definition: device_binary_image.hpp:221
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
sycl::_V1::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
sycl::_V1::detail::LessByHash
Definition: device_image_impl.hpp:35
sycl::_V1::detail::DbgProgMgr
static constexpr int DbgProgMgr
Definition: program_manager.cpp:50
pi_uint32
uint32_t pi_uint32
Definition: pi.h:129
sycl::_V1::detail::RTDeviceBinaryImage::supportsSpecConstants
bool supportsSpecConstants() const
Definition: device_binary_image.hpp:153
sycl::_V1::detail::isDeviceBinaryTypeSupported
static bool isDeviceBinaryTypeSupported(const context &C, RT::PiDeviceBinaryType Format)
Definition: program_manager.cpp:218
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::end
ConstIterator end() const
Definition: device_binary_image.hpp:115
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:842
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:2382
program_impl.hpp
sycl::_V1::detail::createSpirvProgram
static RT::PiProgram createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
Definition: program_manager.cpp:98
sycl::_V1::detail::loadDeviceLib
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, RT::PiProgram &Prog)
Definition: program_manager.cpp:764
sycl::_V1::detail::plugin::reportPiError
void reportPiError(RT::PiResult pi_result, const char *context) const
Definition: plugin.hpp:149
sycl::_V1::detail::pi::PiResult
::pi_result PiResult
Definition: pi.hpp:122
sycl::_V1::detail::pi::getBinaryImageFormat
PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
Definition: pi.cpp:632
sycl::_V1::detail::compatibleWithDevice
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
Definition: program_manager.cpp:1520
pi_device_binaries_struct::NumDeviceBinaries
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
Definition: pi.h:945
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_esimd_emulator.cpp:1307
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_esimd_emulator.cpp:2008
PI_DEVICE_BINARY_TYPE_NATIVE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE
Definition: pi.h:764
sycl::_V1::detail::program_impl::getHandleRef
RT::PiProgram & getHandleRef()
Definition: program_impl.hpp:134
sycl::_V1::detail::getFormatStr
static const char * getFormatStr(RT::PiDeviceBinaryType Format)
Definition: program_manager.cpp:262
sycl::_V1::detail::RTDeviceBinaryImage::getCompileOptions
const char * getCompileOptions() const
Definition: device_binary_image.hpp:167
sycl::_V1::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:44
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_esimd_emulator.cpp:592
sycl::_V1::exception
Definition: exception.hpp:64
sycl::_V1::detail::pi::PiProgram
::pi_program PiProgram
Definition: pi.hpp:130
common.hpp
std::cerr
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
global_handler.hpp
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:49
sycl::_V1::detail::KernelProgramCache::notifyAllBuild
void notifyAllBuild(BuildResult< T > &BR) const
Definition: kernel_program_cache.hpp:165
sycl::_V1::detail::KernelProgramCache::getOrInsertKernel
std::pair< KernelWithBuildStateT *, bool > getOrInsertKernel(RT::PiProgram Program, const std::string &KernelName)
Definition: kernel_program_cache.hpp:132
sycl::_V1::detail::UseSpvEnv
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
_pi_device_binary_property_struct
Definition: pi.h:741
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:760
sycl::_V1::detail::OSModuleHandle
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
sycl::_V1::detail::RTDeviceBinaryImage::getProgramMetadata
const PropertyRange & getProgramMetadata() const
Definition: device_binary_image.hpp:219
program_manager.hpp
sycl::_V1::detail::appendCompileOptionsFromImage
static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devs, const detail::plugin &Plugin)
Definition: program_manager.cpp:362
sycl::_V1::detail::pi::PiKernel
::pi_kernel PiKernel
Definition: pi.hpp:131
persistent_device_code_cache.hpp
sycl::_V1::detail::RTDeviceBinaryImage::getProperty
pi_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
Definition: device_binary_image.cpp:141
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::begin
ConstIterator begin() const
Definition: device_binary_image.hpp:114
sycl::_V1::detail::loadDeviceLibFallback
static RT::PiProgram loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, const RT::PiDevice &Device, bool UseNativeLib)
Definition: program_manager.cpp:847
sycl::_V1::detail::applyLinkOptionsFromEnvironment
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts)
Definition: program_manager.cpp:435
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:2387
all_of
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
sycl::_V1::detail::enableITTAnnotationsIfNeeded
static void enableITTAnnotationsIfNeeded(const RT::PiProgram &Prog, const plugin &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
Definition: program_manager.cpp:56
sycl::_V1::detail::KernelSetId
size_t KernelSetId
Definition: common.hpp:443
sycl::_V1::detail::plugin::call_nocheck
RT::PiResult call_nocheck(ArgsT... Args) const
Calls the PiApi, traces the call, and returns the result.
Definition: plugin.hpp:170
device_image_impl.hpp
sycl::_V1::detail::device_image_plain
Definition: kernel_bundle.hpp:76
sycl::_V1::device::has
bool has(aspect Aspect) const
Indicates if the SYCL device has the given feature.
Definition: device.cpp:202
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:307
sycl::_V1::detail::DeviceLibNames
static const std::map< DeviceLibExt, std::pair< const char *, const char * > > DeviceLibNames
Definition: program_manager.cpp:788
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1383
sycl::_V1::detail::ByteArray::end
ConstIterator end() const
Definition: device_binary_image.hpp:31
backend_types.hpp
sycl::_V1::detail::getOrBuild
KernelProgramCache::BuildResult< RetT > * getOrBuild(KernelProgramCache &KPCache, GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
Definition: program_manager.cpp:144
sycl::_V1::detail::ProgramManager::KernelArgMask
std::vector< bool > KernelArgMask
Definition: program_manager.hpp:84
sycl::_V1::device::get_info
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.cpp:136
sycl::_V1::detail::ByteArray::consume
auto consume()
Definition: device_binary_image.hpp:33
sycl::_V1::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:66
sycl::_V1::detail::RTDeviceBinaryImage::getImageID
std::uintptr_t getImageID() const
Definition: device_binary_image.hpp:226
exception.hpp
sycl::_V1::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
sycl::_V1::detail::applyOptionsFromEnvironment
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
Definition: program_manager.cpp:444
sycl::_V1::context::get_platform
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:135
sycl::_V1::detail::program_impl::stableSerializeSpecConstRegistry
void stableSerializeSpecConstRegistry(SerializedObj &Dst) const
Definition: program_impl.hpp:330
sycl::_V1::detail::ByteArray::dropBytes
void dropBytes(std::size_t Bytes)
Definition: device_binary_image.hpp:40
sycl::_V1::detail::getUint32PropAsBool
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, const char *PropName)
Definition: program_manager.cpp:356
piProgramCreate
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_esimd_emulator.cpp:1303
sycl::_V1::detail::appendLinkOptionsFromImage
static void appendLinkOptionsFromImage(std::string &LinkOpts, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:342
sycl::_V1::detail::program_impl
Definition: program_impl.hpp:38
piProgramGetBuildInfo
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1348
sycl::_V1::detail::setSpecializationConstants
static void setSpecializationConstants(const std::shared_ptr< device_image_impl > &InputImpl, RT::PiProgram Prog, const plugin &Plugin)
Definition: program_manager.cpp:1920
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::isAvailable
bool isAvailable() const
Definition: device_binary_image.hpp:118
sycl::_V1::detail::createKernelArgMask
static ProgramManager::KernelArgMask createKernelArgMask(const ByteArray &Bytes)
Definition: program_manager.cpp:1144
any_of
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
util.hpp
piextDeviceSelectBinary
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_esimd_emulator.cpp:2018
sycl::_V1::detail::RTDeviceBinaryImage::print
virtual void print() const
Definition: device_binary_image.cpp:100
sycl::_V1::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:68
sycl::_V1::detail::KernelProgramCache::getOrInsertProgram
std::pair< ProgramWithBuildStateT *, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
Definition: kernel_program_cache.hpp:114
sycl::_V1::detail::ProgramManager
Definition: program_manager.hpp:81
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:792
sycl::_V1::detail::KernelProgramCache::waitUntilBuilt
void waitUntilBuilt(BuildResult< T > &BR, Predicate Pred) const
Definition: kernel_program_cache.hpp:142
pi_device_binary_struct::EntriesEnd
_pi_offload_entry EntriesEnd
Definition: pi.h:879
sycl::_V1::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
PI_PROGRAM_INFO_DEVICES
@ PI_PROGRAM_INFO_DEVICES
Definition: pi.h:339
usm_settings::Device
@ Device
Definition: usm_allocator_config.hpp:18
sycl::_V1::exception::what
const char * what() const noexcept final
Definition: exception.cpp:74
sycl::_V1::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:54
spec_constant_impl.hpp
piextProgramSetSpecializationConstant
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_esimd_emulator.cpp:2013
sycl::_V1::detail::KernelProgramCache
Definition: kernel_program_cache.hpp:31
sycl::_V1::detail::LastKSId
constexpr KernelSetId LastKSId
Definition: common.hpp:447
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange
Definition: device_binary_image.hpp:86
sycl::_V1::detail::device_image_impl::SpecConstDescT::BlobOffset
unsigned int BlobOffset
Definition: device_image_impl.hpp:52
_pi_offload_entry_struct::name
char * name
Definition: pi.h:721
sycl::_V1::platform::get_info
detail::is_platform_info_desc< Param >::return_type get_info() const
Queries this SYCL platform for info.
Definition: platform.cpp:61
PI_TRUE
const pi_bool PI_TRUE
Definition: pi.h:519
sycl::_V1::detail::ByteArray::size
std::size_t size() const
Definition: device_binary_image.hpp:29
piProgramBuild
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
sycl::_V1::detail::RTDeviceBinaryImage::get
pi_device_binary get() const
Definition: device_binary_image.hpp:233
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:791
sycl::_V1::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:82
sycl::_V1::detail::RTDeviceBinaryImage::getFormat
pi::PiDeviceBinaryType getFormat() const
Returns the format of the binary image.
Definition: device_binary_image.hpp:178
sycl::_V1::detail::plugin::getPiPlugin
const RT::PiPlugin & getPiPlugin() const
Definition: plugin.hpp:106
PI_PROGRAM_BUILD_INFO_LOG
@ PI_PROGRAM_BUILD_INFO_LOG
Definition: pi.h:166
sycl::_V1::detail::get_device_info_string
std::string get_device_info_string(RT::PiDevice dev, RT::PiDeviceInfo InfoCode, const plugin &Plugin)
Definition: device_info.hpp:161
pi_int32
int32_t pi_int32
Definition: pi.h:128
sycl::_V1::detail::getBinImageState
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
Definition: program_manager.cpp:1504
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300
spec_constant.hpp
sycl::_V1::detail::KernelProgramCache::tryToGetKernelFast
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
Definition: kernel_program_cache.hpp:170
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:41
pi_device_binary_struct::EntriesBegin
_pi_offload_entry EntriesBegin
the offload entry table
Definition: pi.h:878
sycl::_V1::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:479