DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.cpp
Go to the documentation of this file.
1 //==------ program_manager.cpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/config.hpp>
10 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
13 #include <detail/event_impl.hpp>
16 #include <detail/platform_impl.hpp>
17 #include <detail/program_impl.hpp>
19 #include <detail/queue_impl.hpp>
21 #include <sycl/aspects.hpp>
22 #include <sycl/backend_types.hpp>
23 #include <sycl/context.hpp>
24 #include <sycl/detail/common.hpp>
26 #include <sycl/detail/os_util.hpp>
28 #include <sycl/detail/util.hpp>
29 #include <sycl/device.hpp>
30 #include <sycl/exception.hpp>
31 #include <sycl/stl.hpp>
32 
33 #include <algorithm>
34 #include <cassert>
35 #include <cstdint>
36 #include <cstdlib>
37 #include <cstring>
38 #include <fstream>
39 #include <memory>
40 #include <mutex>
41 #include <sstream>
42 #include <string>
43 #include <variant>
44 
45 namespace sycl {
46 inline namespace _V1 {
47 namespace detail {
48 
49 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
50 
51 static constexpr int DbgProgMgr = 0;
52 
53 static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
54 
57 static void
59  const PluginPtr &Plugin) {
61  constexpr char SpecValue = 1;
63  Prog, ITTSpecConstId, sizeof(char), &SpecValue);
64  }
65 }
66 
69 }
70 
72 createBinaryProgram(const ContextImplPtr Context, const device &Device,
73  const unsigned char *Data, size_t DataLen,
74  const std::vector<pi_device_binary_property> Metadata) {
75  const PluginPtr &Plugin = Context->getPlugin();
76 #ifndef _NDEBUG
77  pi_uint32 NumDevices = 0;
78  Plugin->call<PiApiKind::piContextGetInfo>(Context->getHandleRef(),
80  sizeof(NumDevices), &NumDevices,
81  /*param_value_size_ret=*/nullptr);
82  assert(NumDevices > 0 &&
83  "Only a single device is supported for AOT compilation");
84 #endif
85 
88  getSyclObjImpl(Device)->getHandleRef();
89  pi_int32 BinaryStatus = CL_SUCCESS;
91  Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data,
92  Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
93 
94  if (BinaryStatus != CL_SUCCESS) {
95  throw runtime_error("Creating program with binary failed.", BinaryStatus);
96  }
97 
98  return Program;
99 }
100 
102 createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data,
103  size_t DataLen) {
104  sycl::detail::pi::PiProgram Program = nullptr;
105  const PluginPtr &Plugin = Context->getPlugin();
106  Plugin->call<PiApiKind::piProgramCreate>(Context->getHandleRef(), Data,
107  DataLen, &Program);
108  return Program;
109 }
110 
130 template <typename RetT, typename ExceptionT, typename GetCachedBuildFT,
131  typename BuildFT>
132 KernelProgramCache::BuildResult<RetT> *
133 getOrBuild(KernelProgramCache &KPCache, GetCachedBuildFT &&GetCachedBuild,
134  BuildFT &&Build) {
135  using BuildState = KernelProgramCache::BuildState;
136 
137  auto [BuildResult, InsertionTookPlace] = GetCachedBuild();
138 
139  // no insertion took place, thus some other thread has already inserted smth
140  // in the cache
141  if (!InsertionTookPlace) {
142  for (;;) {
143  RetT *Result = KPCache.waitUntilBuilt<ExceptionT>(BuildResult);
144 
145  if (Result)
146  return BuildResult;
147 
148  // Previous build is failed. There was no SYCL exception though.
149  // We might try to build once more.
150  BuildState Expected = BuildState::BS_Failed;
151  BuildState Desired = BuildState::BS_InProgress;
152 
153  if (BuildResult->State.compare_exchange_strong(Expected, Desired))
154  break; // this thread is the building thread now
155  }
156  }
157 
158  // only the building thread will run this
159  try {
160  BuildResult->Val = Build();
161  RetT *Desired = &BuildResult->Val;
162 
163 #ifndef NDEBUG
164  RetT *Expected = nullptr;
165 
166  if (!BuildResult->Ptr.compare_exchange_strong(Expected, Desired))
167  // We've got a funny story here
168  assert(false && "We've build an entity that is already have been built.");
169 #else
170  BuildResult->Ptr.store(Desired);
171 #endif
172 
173  {
174  // Even if shared variable is atomic, it must be modified under the mutex
175  // in order to correctly publish the modification to the waiting thread
176  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
177  BuildResult->State.store(BuildState::BS_Done);
178  }
179 
180  KPCache.notifyAllBuild(*BuildResult);
181 
182  return BuildResult;
183  } catch (const exception &Ex) {
184  BuildResult->Error.Msg = Ex.what();
185  BuildResult->Error.Code = Ex.get_cl_code();
186 
187  {
188  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
189  BuildResult->State.store(BuildState::BS_Failed);
190  }
191 
192  KPCache.notifyAllBuild(*BuildResult);
193 
194  std::rethrow_exception(std::current_exception());
195  } catch (...) {
196  {
197  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
198  BuildResult->State.store(BuildState::BS_Failed);
199  }
200 
201  KPCache.notifyAllBuild(*BuildResult);
202 
203  std::rethrow_exception(std::current_exception());
204  }
205 }
206 
207 // TODO replace this with a new PI API function
208 static bool
211  // All formats except PI_DEVICE_BINARY_TYPE_SPIRV are supported.
212  if (Format != PI_DEVICE_BINARY_TYPE_SPIRV)
213  return true;
214 
215  const backend ContextBackend = detail::getSyclObjImpl(C)->getBackend();
216 
217  // The CUDA backend cannot use SPIR-V
218  if (ContextBackend == backend::ext_oneapi_cuda)
219  return false;
220 
221  std::vector<device> Devices = C.get_devices();
222 
223  // Program type is SPIR-V, so we need a device compiler to do JIT.
224  for (const device &D : Devices) {
225  if (!D.get_info<info::device::is_compiler_available>())
226  return false;
227  }
228 
229  // OpenCL 2.1 and greater require clCreateProgramWithIL
230  if (ContextBackend == backend::opencl) {
231  std::string ver = C.get_platform().get_info<info::platform::version>();
232  if (ver.find("OpenCL 1.0") == std::string::npos &&
233  ver.find("OpenCL 1.1") == std::string::npos &&
234  ver.find("OpenCL 1.2") == std::string::npos &&
235  ver.find("OpenCL 2.0") == std::string::npos)
236  return true;
237  }
238 
239  for (const device &D : Devices) {
240  // We need cl_khr_il_program extension to be present
241  // and we can call clCreateProgramWithILKHR using the extension
242  std::vector<std::string> Extensions =
243  D.get_info<info::device::extensions>();
244  if (Extensions.end() ==
245  std::find(Extensions.begin(), Extensions.end(), "cl_khr_il_program"))
246  return false;
247  }
248 
249  return true;
250 }
251 
253  switch (Format) {
255  return "none";
257  return "native";
259  return "SPIR-V";
261  return "LLVM IR";
262  }
263  assert(false && "Unknown device image format");
264  return "unknown";
265 }
266 
269  const context &Context, const device &Device) {
270  if (DbgProgMgr > 0)
271  std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", "
272  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
273  << ")\n";
274  const pi_device_binary_struct &RawImg = Img.getRawData();
275 
276  // perform minimal sanity checks on the device image and the descriptor
277  if (RawImg.BinaryEnd < RawImg.BinaryStart) {
278  throw runtime_error("Malformed device program image descriptor",
279  PI_ERROR_INVALID_VALUE);
280  }
281  if (RawImg.BinaryEnd == RawImg.BinaryStart) {
282  throw runtime_error("Invalid device program image: size is zero",
283  PI_ERROR_INVALID_VALUE);
284  }
285  size_t ImgSize = Img.getSize();
286 
287  // TODO if the binary image is a part of the fat binary, the clang
288  // driver should have set proper format option to the
289  // clang-offload-wrapper. The fix depends on AOT compilation
290  // implementation, so will be implemented together with it.
291  // Img->Format can't be updated as it is inside of the in-memory
292  // OS module binary.
294 
295  if (Format == PI_DEVICE_BINARY_TYPE_NONE)
296  Format = pi::getBinaryImageFormat(RawImg.BinaryStart, ImgSize);
297  // sycl::detail::pi::PiDeviceBinaryType Format = Img->Format;
298  // assert(Format != PI_DEVICE_BINARY_TYPE_NONE && "Image format not set");
299 
300  if (!isDeviceBinaryTypeSupported(Context, Format))
301  throw feature_not_supported(
302  "SPIR-V online compilation is not supported in this context",
303  PI_ERROR_INVALID_OPERATION);
304 
305  // Get program metadata from properties
306  auto ProgMetadata = Img.getProgramMetadata();
307  std::vector<pi_device_binary_property> ProgMetadataVector{
308  ProgMetadata.begin(), ProgMetadata.end()};
309 
310  // Load the image
311  const ContextImplPtr Ctx = getSyclObjImpl(Context);
314  ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize)
315  : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, ImgSize,
316  ProgMetadataVector);
317 
318  {
319  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
320  // associate the PI program with the image it was created for
321  NativePrograms[Res] = &Img;
322  }
323 
324  Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
325 
326  if (DbgProgMgr > 1)
327  std::cerr << "created program: " << Res
328  << "; image format: " << getFormatStr(Format) << "\n";
329 
330  return Res;
331 }
332 
333 static void appendLinkOptionsFromImage(std::string &LinkOpts,
334  const RTDeviceBinaryImage &Img) {
335  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
336  // Update only if link options are not overwritten by environment variable
337  if (!LinkOptsEnv) {
338  const char *TemporaryStr = Img.getLinkOptions();
339  if (TemporaryStr != nullptr) {
340  if (!LinkOpts.empty())
341  LinkOpts += " ";
342  LinkOpts += std::string(TemporaryStr);
343  }
344  }
345 }
346 
348  const char *PropName) {
349  pi_device_binary_property Prop = Img.getProperty(PropName);
350  return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0);
351 }
352 
353 static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img,
354  const char *PropName) {
355  pi_device_binary_property Prop = Img.getProperty(PropName);
356  std::stringstream ss;
357  if (!Prop)
358  return "";
359  int optLevel = DeviceBinaryProperty(Prop).asUint32();
360  if (optLevel < 0 || optLevel > 3)
361  return "";
362  ss << "-O" << optLevel;
363  std::string temp = ss.str();
364  return temp;
365 }
366 
367 static void
369  const RTDeviceBinaryImage &Img,
370  bool IsEsimdImage) {
371  // TODO: sycl-register-alloc-mode is deprecated and should be removed in the
372  // next ABI break.
373  pi_device_binary_property RegAllocModeProp =
374  Img.getProperty("sycl-register-alloc-mode");
375  pi_device_binary_property GRFSizeProp = Img.getProperty("sycl-grf-size");
376 
377  if (!RegAllocModeProp && !GRFSizeProp)
378  return;
379  // The mutual exclusivity of these properties should have been checked in
380  // sycl-post-link.
381  assert(!RegAllocModeProp || !GRFSizeProp);
382  bool IsLargeGRF = false;
383  bool IsAutoGRF = false;
384  if (RegAllocModeProp) {
385  uint32_t RegAllocModePropVal =
386  DeviceBinaryProperty(RegAllocModeProp).asUint32();
387  IsLargeGRF = RegAllocModePropVal ==
388  static_cast<uint32_t>(register_alloc_mode_enum::large);
389  IsAutoGRF = RegAllocModePropVal ==
390  static_cast<uint32_t>(register_alloc_mode_enum::automatic);
391  } else {
392  assert(GRFSizeProp);
393  uint32_t GRFSizePropVal = DeviceBinaryProperty(GRFSizeProp).asUint32();
394  IsLargeGRF = GRFSizePropVal == 256;
395  IsAutoGRF = GRFSizePropVal == 0;
396  }
397  if (IsLargeGRF) {
398  if (!CompileOpts.empty())
399  CompileOpts += " ";
400  // This option works for both LO AND OCL backends.
401  CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file";
402  }
403  // TODO: Support Auto GRF for ESIMD once vc supports it.
404  if (IsAutoGRF && !IsEsimdImage) {
405  if (!CompileOpts.empty())
406  CompileOpts += " ";
407  // This option works for both LO AND OCL backends.
408  CompileOpts += "-ze-intel-enable-auto-large-GRF-mode";
409  }
410 }
411 
412 static void appendCompileOptionsFromImage(std::string &CompileOpts,
413  const RTDeviceBinaryImage &Img,
414  const std::vector<device> &Devs,
415  const PluginPtr &) {
416  // Build options are overridden if environment variables are present.
417  // Environment variables are not changed during program lifecycle so it
418  // is reasonable to use static here to read them only once.
419  static const char *CompileOptsEnv =
421  // Update only if compile options are not overwritten by environment
422  // variable
423  if (!CompileOptsEnv) {
424  if (!CompileOpts.empty())
425  CompileOpts += " ";
426  const char *TemporaryStr = Img.getCompileOptions();
427  if (TemporaryStr != nullptr)
428  CompileOpts += std::string(TemporaryStr);
429  }
430  bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage");
431  // The -vc-codegen option is always preserved for ESIMD kernels, regardless
432  // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable.
433  if (isEsimdImage) {
434  if (!CompileOpts.empty())
435  CompileOpts += " ";
436  CompileOpts += "-vc-codegen";
437  // Allow warning and performance hints from vc/finalizer if the RT warning
438  // level is at least 1.
440  CompileOpts += " -disable-finalizer-msg";
441  }
442 
443  appendCompileOptionsForGRFSizeProperties(CompileOpts, Img, isEsimdImage);
444 
445  const auto &PlatformImpl = detail::getSyclObjImpl(Devs[0].get_platform());
446 
447  // Add optimization flags.
448  auto str = getUint32PropAsOptStr(Img, "optLevel");
449  const char *optLevelStr = str.c_str();
450  // TODO: Passing these options to vector compiler causes build failure in
451  // backend. Will pass the flags once backend compilation issue is resolved.
452  // Update only if compile options are not overwritten by environment
453  // variable.
454  if (!isEsimdImage && !CompileOptsEnv && optLevelStr != nullptr &&
455  optLevelStr[0] != '\0') {
456  // Making sure all devices have the same platform.
457  assert(!Devs.empty() &&
458  std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) {
459  return Dev.get_platform() == Devs[0].get_platform();
460  }));
461  const char *backend_option = nullptr;
462  // Empty string is returned in backend_option when no appropriate backend
463  // option is available for a given frontend option.
464  PlatformImpl->getBackendOption(optLevelStr, &backend_option);
465  if (backend_option && backend_option[0] != '\0') {
466  if (!CompileOpts.empty())
467  CompileOpts += " ";
468  CompileOpts += std::string(backend_option);
469  }
470  }
471  bool IsIntelGPU =
472  (PlatformImpl->getBackend() == backend::ext_oneapi_level_zero ||
473  PlatformImpl->getBackend() == backend::opencl) &&
474  std::all_of(Devs.begin(), Devs.end(), [](const device &Dev) {
475  return Dev.is_gpu() &&
476  Dev.get_info<info::device::vendor_id>() == 0x8086;
477  });
478  if (IsIntelGPU && Img.getDeviceGlobals().size() != 0) {
479  // If the image has device globals we need to add the
480  // -ze-take-global-address option to tell IGC to record addresses of these.
481  if (!CompileOpts.empty())
482  CompileOpts += " ";
483  CompileOpts += "-ze-take-global-address";
484  }
485  if (!CompileOptsEnv) {
486  static const char *TargetCompileFast = "-ftarget-compile-fast";
487  if (auto Pos = CompileOpts.find(TargetCompileFast);
488  Pos != std::string::npos) {
489  const char *BackendOption = nullptr;
490  if (IsIntelGPU)
491  PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
492  auto OptLen = strlen(TargetCompileFast);
493  if (IsIntelGPU && BackendOption && BackendOption[0] != '\0')
494  CompileOpts.replace(Pos, OptLen, BackendOption);
495  else
496  CompileOpts.erase(Pos, OptLen);
497  }
498  }
499 }
500 
501 static void applyOptionsFromImage(std::string &CompileOpts,
502  std::string &LinkOpts,
503  const RTDeviceBinaryImage &Img,
504  const std::vector<device> &Devices,
505  const PluginPtr &Plugin) {
506  appendCompileOptionsFromImage(CompileOpts, Img, Devices, Plugin);
507  appendLinkOptionsFromImage(LinkOpts, Img);
508 }
509 
510 static void applyCompileOptionsFromEnvironment(std::string &CompileOpts) {
511  // Environment variables are not changed during program lifecycle so it
512  // is reasonable to use static here to read them only once.
513  static const char *CompileOptsEnv =
515  if (CompileOptsEnv) {
516  CompileOpts = CompileOptsEnv;
517  }
518 }
519 
520 static void applyLinkOptionsFromEnvironment(std::string &LinkOpts) {
521  // Environment variables are not changed during program lifecycle so it
522  // is reasonable to use static here to read them only once.
523  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
524  if (LinkOptsEnv) {
525  LinkOpts = LinkOptsEnv;
526  }
527 }
528 
529 static void applyOptionsFromEnvironment(std::string &CompileOpts,
530  std::string &LinkOpts) {
531  // Build options are overridden if environment variables are present.
534 }
535 
536 std::pair<sycl::detail::pi::PiProgram, bool>
538  const context &Context,
539  const device &Device,
540  const std::string &CompileAndLinkOptions,
541  SerializedObj SpecConsts) {
542  sycl::detail::pi::PiProgram NativePrg;
543 
545  Device, Img, SpecConsts, CompileAndLinkOptions);
546  if (BinProg.size()) {
547  // Get program metadata from properties
548  auto ProgMetadata = Img.getProgramMetadata();
549  std::vector<pi_device_binary_property> ProgMetadataVector{
550  ProgMetadata.begin(), ProgMetadata.end()};
551 
552  // TODO: Build for multiple devices once supported by program manager
553  NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
554  (const unsigned char *)BinProg[0].data(),
555  BinProg[0].size(), ProgMetadataVector);
556  } else {
557  NativePrg = createPIProgram(Img, Context, Device);
558  }
559  return {NativePrg, BinProg.size()};
560 }
561 
564 static void emitBuiltProgramInfo(const pi_program &Prog,
565  const ContextImplPtr &Context) {
567  std::string ProgramBuildLog =
568  ProgramManager::getProgramBuildLog(Prog, Context);
569  std::clog << ProgramBuildLog << std::endl;
570  }
571 }
572 
574  const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl,
575  const std::string &KernelName, const program_impl *Prg,
576  bool JITCompilationIsRequired) {
577  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
578 
579  std::string CompileOpts;
580  std::string LinkOpts;
581  if (Prg) {
582  CompileOpts = Prg->get_build_options();
583  }
584 
585  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
586 
587  SerializedObj SpecConsts;
588  if (Prg)
589  Prg->stableSerializeSpecConstRegistry(SpecConsts);
590 
591  // Check if we can optimize program builds for sub-devices by using a program
592  // built for the root device
593  DeviceImplPtr RootDevImpl = DeviceImpl;
594  while (!RootDevImpl->isRootDevice()) {
595  auto ParentDev = detail::getSyclObjImpl(
596  RootDevImpl->get_info<info::device::parent_device>());
597  // Sharing is allowed within a single context only
598  if (!ContextImpl->hasDevice(ParentDev))
599  break;
600  RootDevImpl = ParentDev;
601  }
602 
603  pi_bool MustBuildOnSubdevice = PI_TRUE;
604  ContextImpl->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
605  RootDevImpl->getHandleRef(), PI_DEVICE_INFO_BUILD_ON_SUBDEVICE,
606  sizeof(pi_bool), &MustBuildOnSubdevice, nullptr);
607 
608  DeviceImplPtr Dev =
609  (MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl;
610  auto Context = createSyclObjFromImpl<context>(ContextImpl);
611  auto Device = createSyclObjFromImpl<device>(Dev);
612  const RTDeviceBinaryImage &Img =
613  getDeviceImage(KernelName, Context, Device, JITCompilationIsRequired);
614 
615  // Check that device supports all aspects used by the kernel
616  if (auto exception = checkDevSupportDeviceRequirements(Device, Img))
617  throw *exception;
618 
619  auto BuildF = [this, &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts,
620  &LinkOpts, SpecConsts] {
621  const PluginPtr &Plugin = ContextImpl->getPlugin();
622  applyOptionsFromImage(CompileOpts, LinkOpts, Img, {Device}, Plugin);
623 
624  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
625  Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
626 
627  if (!DeviceCodeWasInCache) {
628  if (Prg)
629  flushSpecConstants(*Prg, NativePrg, &Img);
630  if (Img.supportsSpecConstants())
631  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
632  }
633 
634  ProgramPtr ProgramManaged(
635  NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
636 
637  // Link a fallback implementation of device libraries if they are not
638  // supported by a device compiler.
639  // Pre-compiled programs (after AOT compilation or read from persitent
640  // cache) are supposed to be already linked.
641  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
642  // no fallback device library will be linked.
643  uint32_t DeviceLibReqMask = 0;
644  if (!DeviceCodeWasInCache &&
647  DeviceLibReqMask = getDeviceLibReqMask(Img);
648 
649  ProgramPtr BuiltProgram =
650  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
651  getRawSyclObjImpl(Device)->getHandleRef(), DeviceLibReqMask);
652 
653  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
654 
655  {
656  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
657  NativePrograms[BuiltProgram.get()] = &Img;
658  }
659 
660  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
661 
662  // Save program to persistent cache if it is not there
663  if (!DeviceCodeWasInCache)
665  Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
666  return BuiltProgram.release();
667  };
668 
669  uint32_t ImgId = Img.getImageID();
670  const sycl::detail::pi::PiDevice PiDevice = Dev->getHandleRef();
671  auto CacheKey =
672  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
673  std::make_pair(PiDevice, CompileOpts + LinkOpts));
674 
675  auto GetCachedBuildF = [&Cache, &CacheKey]() {
676  return Cache.getOrInsertProgram(CacheKey);
677  };
678 
679  auto BuildResult =
680  getOrBuild<sycl::detail::pi::PiProgram, compile_program_error>(
681  Cache, GetCachedBuildF, BuildF);
682  // getOrBuild is not supposed to return nullptr
683  assert(BuildResult != nullptr && "Invalid build result");
684  return *BuildResult->Ptr.load();
685 }
686 
687 std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *,
690  const DeviceImplPtr &DeviceImpl,
691  const std::string &KernelName,
692  const program_impl *Prg) {
693  if (DbgProgMgr > 0) {
694  std::cerr << ">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get()
695  << ", " << DeviceImpl.get() << ", " << KernelName << ")\n";
696  }
697 
698  using KernelArgMaskPairT = KernelProgramCache::KernelArgMaskPairT;
699 
700  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
701 
702  std::string CompileOpts, LinkOpts;
703  SerializedObj SpecConsts;
704  if (Prg) {
705  CompileOpts = Prg->get_build_options();
706  Prg->stableSerializeSpecConstRegistry(SpecConsts);
707  }
708  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
709  const sycl::detail::pi::PiDevice PiDevice = DeviceImpl->getHandleRef();
710 
711  auto key = std::make_tuple(std::move(SpecConsts), PiDevice,
712  CompileOpts + LinkOpts, KernelName);
713  auto ret_tuple = Cache.tryToGetKernelFast(key);
714  if (std::get<0>(ret_tuple))
715  return ret_tuple;
716 
718  getBuiltPIProgram(ContextImpl, DeviceImpl, KernelName, Prg);
719 
720  auto BuildF = [this, &Program, &KernelName, &ContextImpl] {
721  sycl::detail::pi::PiKernel Kernel = nullptr;
722 
723  const PluginPtr &Plugin = ContextImpl->getPlugin();
725  Program, KernelName.c_str(), &Kernel);
726 
727  // Some PI Plugins (like OpenCL) require this call to enable USM
728  // For others, PI will turn this into a NOP.
730  sizeof(pi_bool), &PI_TRUE);
731 
732  const KernelArgMask *ArgMask = nullptr;
733  if (!m_UseSpvFile)
734  ArgMask = getEliminatedKernelArgMask(Program, KernelName);
735  return std::make_pair(Kernel, ArgMask);
736  };
737 
738  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
739  return Cache.getOrInsertKernel(Program, KernelName);
740  };
741 
742  auto BuildResult = getOrBuild<KernelArgMaskPairT, invalid_object_error>(
743  Cache, GetCachedBuildF, BuildF);
744  // getOrBuild is not supposed to return nullptr
745  assert(BuildResult != nullptr && "Invalid build result");
746  const KernelArgMaskPairT &KernelArgMaskPair = *BuildResult->Ptr.load();
747  auto ret_val = std::make_tuple(KernelArgMaskPair.first,
748  &(BuildResult->MBuildResultMutex),
749  KernelArgMaskPair.second, Program);
750  Cache.saveKernel(key, ret_val);
751  return ret_val;
752 }
753 
756  const ContextImplPtr Context) {
758  const PluginPtr &Plugin = Context->getPlugin();
761  &Program, nullptr);
762  return Program;
763 }
764 
765 std::string
767  const ContextImplPtr Context) {
768  size_t PIDevicesSize = 0;
769  const PluginPtr &Plugin = Context->getPlugin();
770  Plugin->call<PiApiKind::piProgramGetInfo>(Program, PI_PROGRAM_INFO_DEVICES, 0,
771  nullptr, &PIDevicesSize);
772  std::vector<sycl::detail::pi::PiDevice> PIDevices(
773  PIDevicesSize / sizeof(sycl::detail::pi::PiDevice));
775  PIDevicesSize, PIDevices.data(),
776  nullptr);
777  std::string Log = "The program was built for " +
778  std::to_string(PIDevices.size()) + " devices";
779  for (sycl::detail::pi::PiDevice &Device : PIDevices) {
780  std::string DeviceBuildInfoString;
781  size_t DeviceBuildInfoStrSize = 0;
782  Plugin->call<PiApiKind::piProgramGetBuildInfo>(
783  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, 0, nullptr,
784  &DeviceBuildInfoStrSize);
785  if (DeviceBuildInfoStrSize > 0) {
786  std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
787  Plugin->call<PiApiKind::piProgramGetBuildInfo>(
788  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize,
789  DeviceBuildInfo.data(), nullptr);
790  DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
791  }
792 
793  std::string DeviceNameString;
794  size_t DeviceNameStrSize = 0;
795  Plugin->call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_NAME, 0,
796  nullptr, &DeviceNameStrSize);
797  if (DeviceNameStrSize > 0) {
798  std::vector<char> DeviceName(DeviceNameStrSize);
799  Plugin->call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_NAME,
800  DeviceNameStrSize,
801  DeviceName.data(), nullptr);
802  DeviceNameString = std::string(DeviceName.data());
803  }
804  Log += "\nBuild program log for '" + DeviceNameString + "':\n" +
805  DeviceBuildInfoString;
806  }
807  return Log;
808 }
809 
810 // TODO device libraries may use scpecialization constants, manifest files, etc.
811 // To support that they need to be delivered in a different container - so that
812 // pi_device_binary_struct can be created for each of them.
813 static bool loadDeviceLib(const ContextImplPtr Context, const char *Name,
815  std::string LibSyclDir = OSUtil::getCurrentDSODir();
816  std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
817  std::ifstream::in | std::ifstream::binary);
818  if (!File.good()) {
819  return false;
820  }
821 
822  File.seekg(0, std::ios::end);
823  size_t FileSize = File.tellg();
824  File.seekg(0, std::ios::beg);
825  std::vector<char> FileContent(FileSize);
826  File.read(&FileContent[0], FileSize);
827  File.close();
828 
829  Prog =
830  createSpirvProgram(Context, (unsigned char *)&FileContent[0], FileSize);
831  return Prog != nullptr;
832 }
833 
834 // For each extension, a pair of library names. The first uses native support,
835 // the second emulates functionality in software.
836 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
839  {nullptr, "libsycl-fallback-cassert.spv"}},
841  {nullptr, "libsycl-fallback-cmath.spv"}},
843  {nullptr, "libsycl-fallback-cmath-fp64.spv"}},
845  {nullptr, "libsycl-fallback-complex.spv"}},
847  {nullptr, "libsycl-fallback-complex-fp64.spv"}},
849  {nullptr, "libsycl-fallback-cstring.spv"}},
851  {nullptr, "libsycl-fallback-imf.spv"}},
853  {nullptr, "libsycl-fallback-imf-fp64.spv"}},
855  {nullptr, "libsycl-fallback-imf-bf16.spv"}},
857  {"libsycl-native-bfloat16.spv", "libsycl-fallback-bfloat16.spv"}}};
858 
859 static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) {
860  auto LibPair = DeviceLibNames.find(Extension);
861  const char *Lib = nullptr;
862  if (LibPair != DeviceLibNames.end())
863  Lib = Native ? LibPair->second.first : LibPair->second.second;
864  if (Lib == nullptr)
865  throw compile_program_error("Unhandled (new?) device library extension",
866  PI_ERROR_INVALID_OPERATION);
867  return Lib;
868 }
869 
870 // For each extension understood by the SYCL runtime, the string representation
871 // of its name. Names with devicelib in them are internal to the runtime. Others
872 // are actual OpenCL extensions.
873 static const std::map<DeviceLibExt, const char *> DeviceLibExtensionStrs = {
874  {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"},
875  {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"},
877  "cl_intel_devicelib_math_fp64"},
878  {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"},
880  "cl_intel_devicelib_complex_fp64"},
881  {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"},
882  {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"},
883  {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"},
884  {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"},
886  "cl_intel_bfloat16_conversions"}};
887 
888 static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) {
889  auto Ext = DeviceLibExtensionStrs.find(Extension);
890  if (Ext == DeviceLibExtensionStrs.end())
891  throw compile_program_error("Unhandled (new?) device library extension",
892  PI_ERROR_INVALID_OPERATION);
893  return Ext->second;
894 }
895 
898  const sycl::detail::pi::PiDevice &Device,
899  bool UseNativeLib) {
900 
901  auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib);
902 
903  auto LockedCache = Context->acquireCachedLibPrograms();
904  auto CachedLibPrograms = LockedCache.get();
905  auto CacheResult = CachedLibPrograms.emplace(
906  std::make_pair(std::make_pair(Extension, Device), nullptr));
907  bool Cached = !CacheResult.second;
908  auto LibProgIt = CacheResult.first;
909  sycl::detail::pi::PiProgram &LibProg = LibProgIt->second;
910 
911  if (Cached)
912  return LibProg;
913 
914  if (!loadDeviceLib(Context, LibFileName, LibProg)) {
915  CachedLibPrograms.erase(LibProgIt);
916  throw compile_program_error(std::string("Failed to load ") + LibFileName,
917  PI_ERROR_INVALID_VALUE);
918  }
919 
920  const PluginPtr &Plugin = Context->getPlugin();
921  // TODO no spec constants are used in the std libraries, support in the future
923  Plugin->call_nocheck<PiApiKind::piProgramCompile>(
924  LibProg,
925  /*num devices = */ 1, &Device,
926  // Do not use compile options for library programs: it is not clear
927  // if user options (image options) are supposed to be applied to
928  // library program as well, and what actually happens to a SPIR-V
929  // program if we apply them.
930  "", 0, nullptr, nullptr, nullptr, nullptr);
931  if (Error != PI_SUCCESS) {
932  CachedLibPrograms.erase(LibProgIt);
933  throw compile_program_error(
934  ProgramManager::getProgramBuildLog(LibProg, Context), Error);
935  }
936 
937  return LibProg;
938 }
939 
941  const char *SpvFile = std::getenv(UseSpvEnv);
942  // If a SPIR-V file is specified with an environment variable,
943  // register the corresponding image
944  if (SpvFile) {
945  m_UseSpvFile = true;
946  // The env var requests that the program is loaded from a SPIR-V file on
947  // disk
948  std::ifstream File(SpvFile, std::ios::binary);
949 
950  if (!File.is_open())
951  throw runtime_error(std::string("Can't open file specified via ") +
952  UseSpvEnv + ": " + SpvFile,
953  PI_ERROR_INVALID_VALUE);
954  File.seekg(0, std::ios::end);
955  size_t Size = File.tellg();
956  std::unique_ptr<char[]> Data(new char[Size]);
957  File.seekg(0);
958  File.read(Data.get(), Size);
959  File.close();
960  if (!File.good())
961  throw runtime_error(std::string("read from ") + SpvFile +
962  std::string(" failed"),
963  PI_ERROR_INVALID_VALUE);
964  // No need for a mutex here since all access to these private fields is
965  // blocked until the construction of the ProgramManager singleton is
966  // finished.
967  m_SpvFileImage =
968  make_unique_ptr<DynRTDeviceBinaryImage>(std::move(Data), Size);
969 
970  if (DbgProgMgr > 0) {
971  std::cerr << "loaded device image binary from " << SpvFile << "\n";
972  std::cerr << "format: " << getFormatStr(m_SpvFileImage->getFormat())
973  << "\n";
974  }
975  }
976 }
977 
979  bool JITCompilationIsRequired) {
980  if (!JITCompilationIsRequired)
981  return;
982  // If the image is already compiled with AOT, throw an exception.
983  const pi_device_binary_struct &RawImg = Image->getRawData();
984  if ((strcmp(RawImg.DeviceTargetSpec,
986  (strcmp(RawImg.DeviceTargetSpec,
988  (strcmp(RawImg.DeviceTargetSpec,
990  throw feature_not_supported("Recompiling AOT image is not supported",
991  PI_ERROR_INVALID_OPERATION);
992  }
993 }
994 
995 template <typename StorageKey>
997  const std::unordered_multimap<StorageKey, RTDeviceBinaryImage *> &ImagesSet,
998  const StorageKey &Key, const context &Context, const device &Device) {
999  auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key);
1000  if (ItBegin == ItEnd)
1001  return nullptr;
1002 
1003  std::vector<pi_device_binary> RawImgs(std::distance(ItBegin, ItEnd));
1004  auto It = ItBegin;
1005  for (unsigned I = 0; It != ItEnd; ++It, ++I)
1006  RawImgs[I] = const_cast<pi_device_binary>(&It->second->getRawData());
1007 
1008  pi_uint32 ImgInd = 0;
1009  // Ask the native runtime under the given context to choose the device image
1010  // it prefers.
1011  getSyclObjImpl(Context)
1012  ->getPlugin()
1014  getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
1015  (pi_uint32)RawImgs.size(), &ImgInd);
1016  std::advance(ItBegin, ImgInd);
1017  return ItBegin->second;
1018 }
1019 
1020 RTDeviceBinaryImage &
1021 ProgramManager::getDeviceImage(const std::string &KernelName,
1022  const context &Context, const device &Device,
1023  bool JITCompilationIsRequired) {
1024  if (DbgProgMgr > 0) {
1025  std::cerr << ">>> ProgramManager::getDeviceImage(\"" << KernelName << "\", "
1026  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
1027  << ", " << JITCompilationIsRequired << ")\n";
1028 
1029  std::cerr << "available device images:\n";
1031  }
1032 
1033  if (m_UseSpvFile) {
1034  assert(m_SpvFileImage);
1035  return getDeviceImage(
1036  std::unordered_set<RTDeviceBinaryImage *>({m_SpvFileImage.get()}),
1037  Context, Device, JITCompilationIsRequired);
1038  }
1039 
1040  RTDeviceBinaryImage *Img = nullptr;
1041  {
1042  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1043  if (auto KernelId = m_KernelName2KernelIDs.find(KernelName);
1044  KernelId != m_KernelName2KernelIDs.end()) {
1045  // Kernel ID presence guarantees that we have bin image in the storage.
1046  Img = getBinImageFromMultiMap(m_KernelIDs2BinImage, KernelId->second,
1047  Context, Device);
1048  assert(Img && "No binary image found for kernel id");
1049  } else {
1050  Img = getBinImageFromMultiMap(m_ServiceKernels, KernelName, Context,
1051  Device);
1052  }
1053  }
1054  if (Img) {
1055  CheckJITCompilationForImage(Img, JITCompilationIsRequired);
1056 
1057  if (DbgProgMgr > 0) {
1058  std::cerr << "selected device image: " << &Img->getRawData() << "\n";
1059  Img->print();
1060  }
1061  return *Img;
1062  }
1063 
1064  throw runtime_error("No kernel named " + KernelName + " was found",
1065  PI_ERROR_INVALID_KERNEL_NAME);
1066 }
1067 
1069  const std::unordered_set<RTDeviceBinaryImage *> &ImageSet,
1070  const context &Context, const device &Device,
1071  bool JITCompilationIsRequired) {
1072  assert(ImageSet.size() > 0);
1073 
1074  if (DbgProgMgr > 0) {
1075  std::cerr << ">>> ProgramManager::getDeviceImage(Custom SPV file "
1076  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
1077  << ", " << JITCompilationIsRequired << ")\n";
1078 
1079  std::cerr << "available device images:\n";
1081  }
1082 
1083  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1084  std::vector<pi_device_binary> RawImgs(ImageSet.size());
1085  auto ImageIterator = ImageSet.begin();
1086  for (size_t i = 0; i < ImageSet.size(); i++, ImageIterator++)
1087  RawImgs[i] = const_cast<pi_device_binary>(&(*ImageIterator)->getRawData());
1088  pi_uint32 ImgInd = 0;
1089  // Ask the native runtime under the given context to choose the device image
1090  // it prefers.
1091  getSyclObjImpl(Context)
1092  ->getPlugin()
1094  getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
1095  (pi_uint32)RawImgs.size(), &ImgInd);
1096 
1097  ImageIterator = ImageSet.begin();
1098  std::advance(ImageIterator, ImgInd);
1099 
1100  CheckJITCompilationForImage(*ImageIterator, JITCompilationIsRequired);
1101 
1102  if (DbgProgMgr > 0) {
1103  std::cerr << "selected device image: " << &(*ImageIterator)->getRawData()
1104  << "\n";
1105  (*ImageIterator)->print();
1106  }
1107  return **ImageIterator;
1108 }
1109 
1110 static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) {
1111  uint32_t Mask =
1112  0x1 << (static_cast<uint32_t>(Ext) -
1113  static_cast<uint32_t>(DeviceLibExt::cl_intel_devicelib_assert));
1114  return ((DeviceLibReqMask & Mask) == Mask);
1115 }
1116 
1117 static std::vector<sycl::detail::pi::PiProgram>
1119  const sycl::detail::pi::PiDevice &Device,
1120  uint32_t DeviceLibReqMask) {
1121  std::vector<sycl::detail::pi::PiProgram> Programs;
1122 
1123  std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1125  /* is fallback loaded? */ false},
1135 
1136  // Disable all devicelib extensions requiring fp64 support if at least
1137  // one underlying device doesn't support cl_khr_fp64.
1138  std::string DevExtList =
1139  Context->getPlatformImpl()->getDeviceImpl(Device)->get_device_info_string(
1141  const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64"));
1142 
1143  // Load a fallback library for an extension if the device does not
1144  // support it.
1145  for (auto &Pair : RequiredDeviceLibExt) {
1146  DeviceLibExt Ext = Pair.first;
1147  bool &FallbackIsLoaded = Pair.second;
1148 
1149  if (FallbackIsLoaded) {
1150  continue;
1151  }
1152 
1153  if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) {
1154  continue;
1155  }
1156 
1160  !fp64Support) {
1161  continue;
1162  }
1163 
1164  auto ExtName = getDeviceLibExtensionStr(Ext);
1165 
1166  bool InhibitNativeImpl = false;
1167  if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1168  InhibitNativeImpl = strstr(Env, ExtName) != nullptr;
1169  }
1170 
1171  bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1172  if (!DeviceSupports || InhibitNativeImpl) {
1173  Programs.push_back(
1174  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/false));
1175  FallbackIsLoaded = true;
1176  } else {
1177  // bfloat16 needs native library if device supports it
1179  Programs.push_back(
1180  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/true));
1181  FallbackIsLoaded = true;
1182  }
1183  }
1184  }
1185  return Programs;
1186 }
1187 
1188 ProgramManager::ProgramPtr ProgramManager::build(
1189  ProgramPtr Program, const ContextImplPtr Context,
1190  const std::string &CompileOptions, const std::string &LinkOptions,
1191  const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask) {
1192 
1193  if (DbgProgMgr > 0) {
1194  std::cerr << ">>> ProgramManager::build(" << Program.get() << ", "
1195  << CompileOptions << ", " << LinkOptions << ", ... " << Device
1196  << ")\n";
1197  }
1198 
1199  // TODO: old sycl compiler always marks cassert fallback device library as
1200  // "required", this will lead to compatibilty issue when we enable online
1201  // link in SYCL runtime. If users compile their code with old compiler and run
1202  // their executable with latest SYCL runtime, cassert fallback spv file will
1203  // always be loaded which is not expected, cassert device library development
1204  // is still in progress, the unexpected loading may lead to runtime problem.
1205  // So, we clear bit 0 in device library require mask to avoid loading cassert
1206  // fallback device library and will revert this when cassert development is
1207  // done.
1208  DeviceLibReqMask &= 0xFFFFFFFE;
1209  bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1210 
1211  // TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
1212  // We do not link with other device libraries, because it may fail
1213  // due to unrecognized SPIR-V format of those libraries.
1214  if (CompileOptions.find(std::string("-cmc")) != std::string::npos ||
1215  CompileOptions.find(std::string("-vc-codegen")) != std::string::npos)
1216  LinkDeviceLibs = false;
1217 
1218  std::vector<sycl::detail::pi::PiProgram> LinkPrograms;
1219  if (LinkDeviceLibs) {
1220  LinkPrograms = getDeviceLibPrograms(Context, Device, DeviceLibReqMask);
1221  }
1222 
1223  static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK");
1224  static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1');
1225 
1226  const PluginPtr &Plugin = Context->getPlugin();
1227  if (LinkPrograms.empty() && !ForceLink) {
1228  const std::string &Options = LinkOptions.empty()
1229  ? CompileOptions
1230  : (CompileOptions + " " + LinkOptions);
1232  Plugin->call_nocheck<PiApiKind::piProgramBuild>(
1233  Program.get(), /*num devices =*/1, &Device, Options.c_str(),
1234  nullptr, nullptr);
1235  if (Error != PI_SUCCESS)
1236  throw compile_program_error(getProgramBuildLog(Program.get(), Context),
1237  Error);
1238  return Program;
1239  }
1240 
1241  // Include the main program and compile/link everything together
1242  Plugin->call<PiApiKind::piProgramCompile>(Program.get(), /*num devices =*/1,
1243  &Device, CompileOptions.c_str(), 0,
1244  nullptr, nullptr, nullptr, nullptr);
1245  LinkPrograms.push_back(Program.get());
1246 
1247  sycl::detail::pi::PiProgram LinkedProg = nullptr;
1249  Plugin->call_nocheck<PiApiKind::piProgramLink>(
1250  Context->getHandleRef(), /*num devices =*/1, &Device,
1251  LinkOptions.c_str(), LinkPrograms.size(), LinkPrograms.data(),
1252  nullptr, nullptr, &LinkedProg);
1253 
1254  // Link program call returns a new program object if all parameters are valid,
1255  // or NULL otherwise. Release the original (user) program.
1256  Program.reset(LinkedProg);
1257  if (Error != PI_SUCCESS) {
1258  if (LinkedProg) {
1259  // A non-trivial error occurred during linkage: get a build log, release
1260  // an incomplete (but valid) LinkedProg, and throw.
1261  throw compile_program_error(getProgramBuildLog(LinkedProg, Context),
1262  Error);
1263  }
1264  Plugin->checkPiResult(Error);
1265  }
1266  return Program;
1267 }
1268 
1269 void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) {
1270  const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1271  Img.getAssertUsed();
1272  if (AssertUsedRange.isAvailable())
1273  for (const auto &Prop : AssertUsedRange)
1274  m_KernelUsesAssert.insert(Prop->Name);
1275 }
1276 
1277 bool ProgramManager::kernelUsesAssert(const std::string &KernelName) const {
1278  return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
1279 }
1280 
1282  const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1283  for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) {
1284  pi_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]);
1285  const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
1286  const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
1287  // Treat the image as empty one
1288  if (EntriesB == EntriesE)
1289  continue;
1290 
1291  auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg);
1292  static uint32_t SequenceID = 0;
1293 
1294  // Fill the kernel argument mask map
1295  const RTDeviceBinaryImage::PropertyRange &KPOIRange =
1296  Img->getKernelParamOptInfo();
1297  if (KPOIRange.isAvailable()) {
1298  KernelNameToArgMaskMap &ArgMaskMap =
1299  m_EliminatedKernelArgMasks[Img.get()];
1300  for (const auto &Info : KPOIRange)
1301  ArgMaskMap[Info->Name] =
1302  createKernelArgMask(DeviceBinaryProperty(Info).asByteArray());
1303  }
1304 
1305  // Fill maps for kernel bundles
1306  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1307 
1308  // Register all exported symbols
1309  auto ExportedSymbols = Img->getExportedSymbols();
1310  for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols)
1311  m_ExportedSymbols.insert(ExportedSymbol->Name);
1312 
1313  if (DumpImages) {
1314  const bool NeedsSequenceID = std::any_of(
1315  m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(),
1316  [&](auto &CurrentImg) {
1317  return CurrentImg.first->getFormat() == Img->getFormat();
1318  });
1319  dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0);
1320  }
1321 
1322  m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);
1323 
1324  for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1325  ++EntriesIt) {
1326 
1327  // Skip creating unique kernel ID if it is a service kernel.
1328  // SYCL service kernels are identified by having
1329  // __sycl_service_kernel__ in the mangled name, primarily as part of
1330  // the namespace of the name type.
1331  if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
1332  m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
1333  continue;
1334  }
1335 
1336  // Skip creating unique kernel ID if it is an exported device
1337  // function. Exported device functions appear in the offload entries
1338  // among kernels, but are identifiable by being listed in properties.
1339  if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1340  continue;
1341 
1342  // ... and create a unique kernel ID for the entry
1343  auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1344  if (It == m_KernelName2KernelIDs.end()) {
1345  std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1346  std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1347  sycl::kernel_id KernelID =
1348  detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1349 
1350  It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
1351  }
1352  m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1353  m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1354  }
1355 
1356  cacheKernelUsesAssertInfo(*Img);
1357 
1358  // Sort kernel ids for faster search
1359  std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1360  m_BinImg2KernelIDs[Img.get()]->end(), LessByHash<kernel_id>{});
1361 
1362  // ... and initialize associated device_global information
1363  {
1364  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1365 
1366  auto DeviceGlobals = Img->getDeviceGlobals();
1367  for (const pi_device_binary_property &DeviceGlobal : DeviceGlobals) {
1368  ByteArray DeviceGlobalInfo =
1369  DeviceBinaryProperty(DeviceGlobal).asByteArray();
1370 
1371  // The supplied device_global info property is expected to contain:
1372  // * 8 bytes - Size of the property.
1373  // * 4 bytes - Size of the underlying type in the device_global.
1374  // * 4 bytes - 0 if device_global has device_image_scope and any value
1375  // otherwise.
1376  DeviceGlobalInfo.dropBytes(8);
1377  auto [TypeSize, DeviceImageScopeDecorated] =
1378  DeviceGlobalInfo.consume<std::uint32_t, std::uint32_t>();
1379  assert(DeviceGlobalInfo.empty() && "Extra data left!");
1380 
1381  // Give the image pointer as an identifier for the image the
1382  // device-global is associated with.
1383 
1384  auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1385  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1386  // If it has already been registered we update the information.
1387  ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize,
1388  DeviceImageScopeDecorated);
1389  } else {
1390  // If it has not already been registered we create a new entry.
1391  // Note: Pointer to the device global is not available here, so it
1392  // cannot be set until registration happens.
1393  auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1394  DeviceGlobal->Name, Img.get(), TypeSize,
1395  DeviceImageScopeDecorated);
1396  m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1397  }
1398  }
1399  }
1400  // ... and initialize associated host_pipe information
1401  {
1402  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1403  auto HostPipes = Img->getHostPipes();
1404  for (const pi_device_binary_property &HostPipe : HostPipes) {
1405  ByteArray HostPipeInfo = DeviceBinaryProperty(HostPipe).asByteArray();
1406 
1407  // The supplied host_pipe info property is expected to contain:
1408  // * 8 bytes - Size of the property.
1409  // * 4 bytes - Size of the underlying type in the host_pipe.
1410  // Note: Property may be padded.
1411 
1412  HostPipeInfo.dropBytes(8);
1413  auto TypeSize = HostPipeInfo.consume<std::uint32_t>();
1414  assert(HostPipeInfo.empty() && "Extra data left!");
1415 
1416  auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name);
1417  if (ExistingHostPipe != m_HostPipes.end()) {
1418  // If it has already been registered we update the information.
1419  ExistingHostPipe->second->initialize(TypeSize);
1420  ExistingHostPipe->second->initialize(Img.get());
1421  } else {
1422  // If it has not already been registered we create a new entry.
1423  // Note: Pointer to the host pipe is not available here, so it
1424  // cannot be set until registration happens.
1425  auto EntryUPtr =
1426  std::make_unique<HostPipeMapEntry>(HostPipe->Name, TypeSize);
1427  EntryUPtr->initialize(Img.get());
1428  m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr));
1429  }
1430  }
1431  }
1432  m_DeviceImages.insert(std::move(Img));
1433  }
1434 }
1435 
1437  for (const auto &ImgIt : m_BinImg2KernelIDs) {
1438  ImgIt.first->print();
1439  }
1440 }
1441 
1442 void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img,
1443  uint32_t SequenceID) const {
1444  const char *Prefix = std::getenv("SYCL_DUMP_IMAGES_PREFIX");
1445  std::string Fname(Prefix ? Prefix : "sycl_");
1446  const pi_device_binary_struct &RawImg = Img.getRawData();
1447  Fname += RawImg.DeviceTargetSpec;
1448  if (SequenceID)
1449  Fname += '_' + std::to_string(SequenceID);
1450  std::string Ext;
1451 
1453  if (Format == PI_DEVICE_BINARY_TYPE_SPIRV)
1454  Ext = ".spv";
1455  else if (Format == PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE)
1456  Ext = ".bc";
1457  else
1458  Ext = ".bin";
1459  Fname += Ext;
1460 
1461  std::ofstream F(Fname, std::ios::binary);
1462 
1463  if (!F.is_open()) {
1464  throw runtime_error("Can not write " + Fname, PI_ERROR_UNKNOWN);
1465  }
1466  Img.dump(F);
1467  F.close();
1468 }
1469 
1471  sycl::detail::pi::PiProgram NativePrg,
1472  const RTDeviceBinaryImage *Img) {
1473  if (DbgProgMgr > 2) {
1474  std::cerr << ">>> ProgramManager::flushSpecConstants(" << Prg.get()
1475  << ",...)\n";
1476  }
1477  if (!Prg.hasSetSpecConstants())
1478  return; // nothing to do
1479  pi::PiProgram PrgHandle = Prg.getHandleRef();
1480  // program_impl can't correspond to two different native programs
1481  assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1482  NativePrg = NativePrg ? NativePrg : PrgHandle;
1483 
1484  if (!Img) {
1485  // caller hasn't provided the image object - find it
1486  { // make sure NativePrograms map access is synchronized
1487  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1488  auto It = NativePrograms.find(NativePrg);
1489  if (It == NativePrograms.end())
1490  throw sycl::exception(
1491  sycl::errc::invalid,
1492  "spec constant is set in a program w/o a binary image");
1493  Img = It->second;
1494  }
1495  if (!Img->supportsSpecConstants()) {
1496  if (DbgProgMgr > 0)
1497  std::cerr << ">>> ProgramManager::flushSpecConstants: binary image "
1498  << &Img->getRawData() << " doesn't support spec constants\n";
1499  // This device binary image does not support runtime setting of
1500  // specialization constants; compiler must have generated default values.
1501  // NOTE: Can't throw here, as it would always take place with AOT
1502  //-compiled code. New Khronos 2020 spec should fix this inconsistency.
1503  return;
1504  }
1505  }
1506  Prg.flush_spec_constants(*Img, NativePrg);
1507 }
1508 
1509 // If the kernel is loaded from spv file, it may not include DeviceLib require
1510 // mask, sycl runtime won't know which fallback device libraries are needed. In
1511 // such case, the safest way is to load all fallback device libraries.
1513  const RTDeviceBinaryImage::PropertyRange &DLMRange =
1514  Img.getDeviceLibReqMask();
1515  if (DLMRange.isAvailable())
1516  return DeviceBinaryProperty(*(DLMRange.begin())).asUint32();
1517  else
1518  return 0xFFFFFFFF;
1519 }
1520 
1521 const KernelArgMask *
1523  const std::string &KernelName) {
1524  // Bail out if there are no eliminated kernel arg masks in our images
1525  if (m_EliminatedKernelArgMasks.empty())
1526  return nullptr;
1527 
1528  {
1529  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1530  auto ImgIt = NativePrograms.find(NativePrg);
1531  if (ImgIt != NativePrograms.end()) {
1532  auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1533  if (MapIt != m_EliminatedKernelArgMasks.end()) {
1534  auto ArgMaskMapIt = MapIt->second.find(KernelName);
1535  if (ArgMaskMapIt != MapIt->second.end())
1536  return &MapIt->second[KernelName];
1537  }
1538  return nullptr;
1539  }
1540  }
1541 
1542  // If the program was not cached iterate over all available images looking for
1543  // the requested kernel
1544  for (auto &Elem : m_EliminatedKernelArgMasks) {
1545  auto ArgMask = Elem.second.find(KernelName);
1546  if (ArgMask != Elem.second.end())
1547  return &ArgMask->second;
1548  }
1549 
1550  // The kernel is not generated by DPCPP stack, so a mask doesn't exist for it
1551  return nullptr;
1552 }
1553 
1555  auto IsAOTBinary = [](const char *Format) {
1556  return (
1557  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) ||
1558  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) ||
1559  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0));
1560  };
1561 
1562  // There are only two initial states so far - SPIRV which needs to be compiled
1563  // and linked and fully compiled(AOTed) binary
1564 
1565  const bool IsAOT = IsAOTBinary(BinImage->getRawData().DeviceTargetSpec);
1566 
1567  return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1568 }
1569 
1571  const device &Dev) {
1572  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1574  auto &Plugin = DeviceImpl->getPlugin();
1575 
1576  const sycl::detail::pi::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef();
1577 
1578  // Call piextDeviceSelectBinary with only one image to check if an image is
1579  // compatible with implementation. The function returns invalid index if no
1580  // device images are compatible.
1581  pi_uint32 SuitableImageID = std::numeric_limits<pi_uint32>::max();
1582  pi_device_binary DevBin =
1583  const_cast<pi_device_binary>(&BinImage->getRawData());
1585  Plugin->call_nocheck<PiApiKind::piextDeviceSelectBinary>(
1586  PIDeviceHandle, &DevBin,
1587  /*num bin images = */ (pi_uint32)1, &SuitableImageID);
1588  if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1589  throw runtime_error("Invalid binary image or device",
1590  PI_ERROR_INVALID_VALUE);
1591 
1592  return (0 == SuitableImageID);
1593 }
1594 
1595 kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) {
1596  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1597 
1598  auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1599  if (KernelID == m_KernelName2KernelIDs.end())
1600  throw runtime_error("No kernel found with the specified name",
1601  PI_ERROR_INVALID_KERNEL_NAME);
1602 
1603  return KernelID->second;
1604 }
1605 
1607  std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1608 
1609  return std::any_of(
1610  m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1611  [&](std::pair<RTDeviceBinaryImage *,
1612  std::shared_ptr<std::vector<kernel_id>>>
1613  Elem) { return compatibleWithDevice(Elem.first, Dev); });
1614 }
1615 
1616 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1617  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1618 
1619  std::vector<sycl::kernel_id> AllKernelIDs;
1620  AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1621  for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1622  AllKernelIDs.push_back(KernelID.second);
1623  }
1624  return AllKernelIDs;
1625 }
1626 
1627 kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) {
1628  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1629 
1630  auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1631  if (KernelID == m_BuiltInKernelIDs.end()) {
1632  auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1633  auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1634  KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1635  }
1636 
1637  return KernelID->second;
1638 }
1639 
1640 void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
1641  const char *UniqueId) {
1642  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1643 
1644  auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1645  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1646  // Update the existing information and add the entry to the pointer map.
1647  ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1648  m_Ptr2DeviceGlobal.insert(
1649  {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1650  return;
1651  }
1652 
1653  auto EntryUPtr =
1654  std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1655  auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1656  m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1657 }
1658 
1659 std::set<RTDeviceBinaryImage *>
1660 ProgramManager::getRawDeviceImages(const std::vector<kernel_id> &KernelIDs) {
1661  std::set<RTDeviceBinaryImage *> BinImages;
1662  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1663  for (const kernel_id &KID : KernelIDs) {
1664  auto Range = m_KernelIDs2BinImage.equal_range(KID);
1665  for (auto It = Range.first, End = Range.second; It != End; ++It)
1666  BinImages.insert(It->second);
1667  }
1668  return BinImages;
1669 }
1670 
1672 ProgramManager::getDeviceGlobalEntry(const void *DeviceGlobalPtr) {
1673  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1674  auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1675  assert(Entry != m_Ptr2DeviceGlobal.end() && "Device global entry not found");
1676  return Entry->second;
1677 }
1678 
1679 std::vector<DeviceGlobalMapEntry *> ProgramManager::getDeviceGlobalEntries(
1680  const std::vector<std::string> &UniqueIds,
1681  bool ExcludeDeviceImageScopeDecorated) {
1682  std::vector<DeviceGlobalMapEntry *> FoundEntries;
1683  FoundEntries.reserve(UniqueIds.size());
1684 
1685  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1686  for (const std::string &UniqueId : UniqueIds) {
1687  auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1688  assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1689  "Device global not found in map.");
1690  if (!ExcludeDeviceImageScopeDecorated ||
1691  !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1692  FoundEntries.push_back(DeviceGlobalEntry->second.get());
1693  }
1694  return FoundEntries;
1695 }
1696 
1697 void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr,
1698  const char *UniqueId) {
1699  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1700 
1701  auto ExistingHostPipe = m_HostPipes.find(UniqueId);
1702  if (ExistingHostPipe != m_HostPipes.end()) {
1703  ExistingHostPipe->second->initialize(HostPipePtr);
1704  m_Ptr2HostPipe.insert({HostPipePtr, ExistingHostPipe->second.get()});
1705  return;
1706  }
1707 
1708  auto EntryUPtr = std::make_unique<HostPipeMapEntry>(UniqueId, HostPipePtr);
1709  auto NewEntry = m_HostPipes.emplace(UniqueId, std::move(EntryUPtr));
1710  m_Ptr2HostPipe.insert({HostPipePtr, NewEntry.first->second.get()});
1711 }
1712 
1714 ProgramManager::getHostPipeEntry(const std::string &UniqueId) {
1715  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1716  auto Entry = m_HostPipes.find(UniqueId);
1717  assert(Entry != m_HostPipes.end() && "Host pipe entry not found");
1718  return Entry->second.get();
1719 }
1720 
1722  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1723  auto Entry = m_Ptr2HostPipe.find(HostPipePtr);
1724  assert(Entry != m_Ptr2HostPipe.end() && "Host pipe entry not found");
1725  return Entry->second;
1726 }
1727 
1729  RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev) {
1730  const bundle_state ImgState = getBinImageState(BinImage);
1731 
1732  assert(compatibleWithDevice(BinImage, Dev));
1733 
1734  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1735  // Collect kernel names for the image.
1736  {
1737  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1738  KernelIDs = m_BinImg2KernelIDs[BinImage];
1739  }
1740 
1741  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1742  BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1743  /*PIProgram=*/nullptr);
1744 
1745  return createSyclObjFromImpl<device_image_plain>(Impl);
1746 }
1747 
1748 std::vector<device_image_plain>
1750  const context &Ctx, const std::vector<device> &Devs,
1751  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs) {
1752 
1753  // Collect unique raw device images taking into account kernel ids passed
1754  // TODO: Can we avoid repacking?
1755  std::set<RTDeviceBinaryImage *> BinImages;
1756  if (!KernelIDs.empty()) {
1757  for (const auto &KID : KernelIDs) {
1758  bool isCompatibleWithAtLeastOneDev =
1759  std::any_of(Devs.begin(), Devs.end(), [&KID](const auto &Dev) {
1760  return sycl::is_compatible({KID}, Dev);
1761  });
1762  if (!isCompatibleWithAtLeastOneDev)
1763  throw sycl::exception(
1765  "Kernel is incompatible with all devices in devs");
1766  }
1767  BinImages = getRawDeviceImages(KernelIDs);
1768  } else {
1769  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1770  for (auto &ImageUPtr : m_BinImg2KernelIDs) {
1771  BinImages.insert(ImageUPtr.first);
1772  }
1773  }
1774 
1775  // Ignore images with incompatible state. Image is considered compatible
1776  // with a target state if an image is already in the target state or can
1777  // be brought to target state by compiling/linking/building.
1778  //
1779  // Example: an image in "executable" state is not compatible with
1780  // "input" target state - there is no operation to convert the image it
1781  // to "input" state. An image in "input" state is compatible with
1782  // "executable" target state because it can be built to get into
1783  // "executable" state.
1784  for (auto It = BinImages.begin(); It != BinImages.end();) {
1785  if (getBinImageState(*It) > TargetState)
1786  It = BinImages.erase(It);
1787  else
1788  ++It;
1789  }
1790 
1791  std::vector<device_image_plain> SYCLDeviceImages;
1792 
1793  // If a non-input state is requested, we can filter out some compatible
1794  // images and return only those with the highest compatible state for each
1795  // device-kernel pair. This map tracks how many kernel-device pairs need each
1796  // image, so that any unneeded ones are skipped.
1797  // TODO this has no effect if the requested state is input, consider having
1798  // a separate branch for that case to avoid unnecessary tracking work.
1799  struct DeviceBinaryImageInfo {
1800  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1802  int RequirementCounter = 0;
1803  };
1804  std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
1805 
1806  for (const sycl::device &Dev : Devs) {
1807  // Track the highest image state for each requested kernel.
1808  using StateImagesPairT =
1809  std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
1810  using KernelImageMapT =
1811  std::map<kernel_id, StateImagesPairT, LessByNameComp>;
1812  KernelImageMapT KernelImageMap;
1813  if (!KernelIDs.empty())
1814  for (const kernel_id &KernelID : KernelIDs)
1815  KernelImageMap.insert({KernelID, {}});
1816 
1817  for (RTDeviceBinaryImage *BinImage : BinImages) {
1818  if (!compatibleWithDevice(BinImage, Dev) ||
1819  !doesDevSupportDeviceRequirements(Dev, *BinImage))
1820  continue;
1821 
1822  auto InsertRes = ImageInfoMap.insert({BinImage, {}});
1823  DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
1824  if (InsertRes.second) {
1825  ImgInfo.State = getBinImageState(BinImage);
1826  // Collect kernel names for the image
1827  {
1828  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1829  ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
1830  }
1831  }
1832  const bundle_state ImgState = ImgInfo.State;
1833  const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
1834  ImgInfo.KernelIDs;
1835  int &ImgRequirementCounter = ImgInfo.RequirementCounter;
1836 
1837  // If the image does not contain any non-service kernels we can skip it.
1838  if (!ImageKernelIDs || ImageKernelIDs->empty())
1839  continue;
1840 
1841  // Update tracked information.
1842  for (kernel_id &KernelID : *ImageKernelIDs) {
1843  StateImagesPairT *StateImagesPair;
1844  // If only specific kernels are requested, ignore the rest.
1845  if (!KernelIDs.empty()) {
1846  auto It = KernelImageMap.find(KernelID);
1847  if (It == KernelImageMap.end())
1848  continue;
1849  StateImagesPair = &It->second;
1850  } else
1851  StateImagesPair = &KernelImageMap[KernelID];
1852 
1853  auto &[KernelImagesState, KernelImages] = *StateImagesPair;
1854 
1855  if (KernelImages.empty()) {
1856  KernelImagesState = ImgState;
1857  KernelImages.push_back(BinImage);
1858  ++ImgRequirementCounter;
1859  } else if (KernelImagesState < ImgState) {
1860  for (RTDeviceBinaryImage *Img : KernelImages) {
1861  auto It = ImageInfoMap.find(Img);
1862  assert(It != ImageInfoMap.end());
1863  assert(It->second.RequirementCounter > 0);
1864  --(It->second.RequirementCounter);
1865  }
1866  KernelImages.clear();
1867  KernelImages.push_back(BinImage);
1868  KernelImagesState = ImgState;
1869  ++ImgRequirementCounter;
1870  } else if (KernelImagesState == ImgState) {
1871  KernelImages.push_back(BinImage);
1872  ++ImgRequirementCounter;
1873  }
1874  }
1875  }
1876  }
1877 
1878  for (const auto &ImgInfoPair : ImageInfoMap) {
1879  if (ImgInfoPair.second.RequirementCounter == 0)
1880  continue;
1881 
1882  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1883  ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
1884  ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
1885 
1886  SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
1887  }
1888 
1889  return SYCLDeviceImages;
1890 }
1891 
1892 void ProgramManager::bringSYCLDeviceImagesToState(
1893  std::vector<device_image_plain> &DeviceImages, bundle_state TargetState) {
1894 
1895  for (device_image_plain &DevImage : DeviceImages) {
1896  const bundle_state DevImageState = getSyclObjImpl(DevImage)->get_state();
1897 
1898  switch (TargetState) {
1899  case bundle_state::input:
1900  // Do nothing since there is no state which can be upgraded to the input.
1901  assert(DevImageState == bundle_state::input);
1902  break;
1903  case bundle_state::object:
1904  if (DevImageState == bundle_state::input) {
1905  DevImage = compile(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1906  /*PropList=*/{});
1907  break;
1908  }
1909  // Device image is expected to be object state then.
1910  assert(DevImageState == bundle_state::object);
1911  break;
1912  case bundle_state::executable: {
1913  switch (DevImageState) {
1914  case bundle_state::input:
1915  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1916  /*PropList=*/{});
1917  break;
1918  case bundle_state::object: {
1919  std::vector<device_image_plain> LinkedDevImages =
1920  link({DevImage}, getSyclObjImpl(DevImage)->get_devices(),
1921  /*PropList=*/{});
1922  // Since only one device image is passed here one output device image is
1923  // expected
1924  assert(LinkedDevImages.size() == 1 && "Expected one linked image here");
1925  DevImage = LinkedDevImages[0];
1926  break;
1927  }
1929  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1930  /*PropList=*/{});
1931  break;
1932  }
1933  break;
1934  }
1935  }
1936  }
1937 }
1938 
1939 std::vector<device_image_plain>
1940 ProgramManager::getSYCLDeviceImages(const context &Ctx,
1941  const std::vector<device> &Devs,
1942  bundle_state TargetState) {
1943  // Collect device images with compatible state
1944  std::vector<device_image_plain> DeviceImages =
1945  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1946  // Bring device images with compatible state to desired state.
1947  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1948  return DeviceImages;
1949 }
1950 
1951 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1952  const context &Ctx, const std::vector<device> &Devs,
1953  const DevImgSelectorImpl &Selector, bundle_state TargetState) {
1954  // Collect device images with compatible state
1955  std::vector<device_image_plain> DeviceImages =
1956  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1957 
1958  // Filter out images that are rejected by Selector
1959  auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1960  [&Selector](const device_image_plain &Image) {
1961  return !Selector(getSyclObjImpl(Image));
1962  });
1963  DeviceImages.erase(It, DeviceImages.end());
1964 
1965  // The spec says that the function should not call online compiler or linker
1966  // to translate device images into target state
1967  return DeviceImages;
1968 }
1969 
1970 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1971  const context &Ctx, const std::vector<device> &Devs,
1972  const std::vector<kernel_id> &KernelIDs, bundle_state TargetState) {
1973  // Fast path for when no kernel IDs are requested
1974  if (KernelIDs.empty())
1975  return {};
1976 
1977  {
1978  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1979 
1980  for (auto &It : m_BuiltInKernelIDs) {
1981  if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1982  KernelIDs.end())
1984  "Attempting to use a built-in kernel. They are "
1985  "not fully supported");
1986  }
1987  }
1988 
1989  // Collect device images with compatible state
1990  std::vector<device_image_plain> DeviceImages =
1991  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1992 
1993  // Bring device images with compatible state to desired state.
1994  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1995  return DeviceImages;
1996 }
1997 
1998 static void
1999 setSpecializationConstants(const std::shared_ptr<device_image_impl> &InputImpl,
2001  const PluginPtr &Plugin) {
2002  // Set ITT annotation specialization constant if needed.
2003  enableITTAnnotationsIfNeeded(Prog, Plugin);
2004 
2005  std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
2006  const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
2007  &SpecConstData = InputImpl->get_spec_const_data_ref();
2008  const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
2009 
2010  // Set all specialization IDs from descriptors in the input device image.
2011  for (const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
2012  std::ignore = SpecConstNames;
2013  for (const device_image_impl::SpecConstDescT &SpecIDDesc : SpecConstDescs) {
2014  if (SpecIDDesc.IsSet) {
2016  Prog, SpecIDDesc.ID, SpecIDDesc.Size,
2017  SpecConsts.data() + SpecIDDesc.BlobOffset);
2018  }
2019  }
2020  }
2021 }
2022 
2023 device_image_plain
2025  const std::vector<device> &Devs,
2026  const property_list &) {
2027 
2028  // TODO: Extract compile options from property list once the Spec clarifies
2029  // how they can be passed.
2030 
2031  // TODO: Probably we could have cached compiled device images.
2032  const std::shared_ptr<device_image_impl> &InputImpl =
2033  getSyclObjImpl(DeviceImage);
2034 
2035  const PluginPtr &Plugin =
2036  getSyclObjImpl(InputImpl->get_context())->getPlugin();
2037 
2038  // TODO: Add support for creating non-SPIRV programs from multiple devices.
2039  if (InputImpl->get_bin_image_ref()->getFormat() !=
2041  Devs.size() > 1)
2042  sycl::runtime_error(
2043  "Creating a program from AOT binary for multiple device is not "
2044  "supported",
2045  PI_ERROR_INVALID_OPERATION);
2046 
2047  // Device is not used when creating program from SPIRV, so passing only one
2048  // device is OK.
2049  sycl::detail::pi::PiProgram Prog = createPIProgram(
2050  *InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs[0]);
2051 
2052  if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
2053  setSpecializationConstants(InputImpl, Prog, Plugin);
2054 
2055  DeviceImageImplPtr ObjectImpl = std::make_shared<detail::device_image_impl>(
2056  InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
2057  bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog,
2058  InputImpl->get_spec_const_data_ref(),
2059  InputImpl->get_spec_const_blob_ref());
2060 
2061  std::vector<pi_device> PIDevices;
2062  PIDevices.reserve(Devs.size());
2063  for (const device &Dev : Devs)
2064  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
2065 
2066  // TODO: Handle zero sized Device list.
2067  std::string CompileOptions;
2068  applyCompileOptionsFromEnvironment(CompileOptions);
2070  CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
2072  Plugin->call_nocheck<PiApiKind::piProgramCompile>(
2073  ObjectImpl->get_program_ref(), /*num devices=*/Devs.size(),
2074  PIDevices.data(), CompileOptions.c_str(),
2075  /*num_input_headers=*/0, /*input_headers=*/nullptr,
2076  /*header_include_names=*/nullptr,
2077  /*pfn_notify=*/nullptr, /*user_data*/ nullptr);
2078  if (Error != PI_SUCCESS)
2079  throw sycl::exception(
2081  getProgramBuildLog(ObjectImpl->get_program_ref(),
2082  getSyclObjImpl(ObjectImpl->get_context())));
2083 
2084  return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2085 }
2086 
2087 std::vector<device_image_plain>
2089  const std::vector<device> &Devs,
2090  const property_list &PropList) {
2091  (void)PropList;
2092 
2093  std::vector<pi_program> PIPrograms;
2094  PIPrograms.push_back(getSyclObjImpl(DeviceImage)->get_program_ref());
2095 
2096  std::vector<pi_device> PIDevices;
2097  PIDevices.reserve(Devs.size());
2098  for (const device &Dev : Devs)
2099  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
2100 
2101  std::string LinkOptionsStr;
2102  applyLinkOptionsFromEnvironment(LinkOptionsStr);
2103  if (LinkOptionsStr.empty()) {
2104  const std::shared_ptr<device_image_impl> &InputImpl =
2105  getSyclObjImpl(DeviceImage);
2106  appendLinkOptionsFromImage(LinkOptionsStr,
2107  *(InputImpl->get_bin_image_ref()));
2108  }
2109  const context &Context = getSyclObjImpl(DeviceImage)->get_context();
2110  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2111  const PluginPtr &Plugin = ContextImpl->getPlugin();
2112 
2113  sycl::detail::pi::PiProgram LinkedProg = nullptr;
2115  Plugin->call_nocheck<PiApiKind::piProgramLink>(
2116  ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
2117  /*options=*/LinkOptionsStr.c_str(), PIPrograms.size(),
2118  PIPrograms.data(),
2119  /*pfn_notify=*/nullptr,
2120  /*user_data=*/nullptr, &LinkedProg);
2121 
2122  if (Error != PI_SUCCESS) {
2123  if (LinkedProg) {
2124  const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2125  throw sycl::exception(make_error_code(errc::build), ErrorMsg);
2126  }
2127  Plugin->reportPiError(Error, "link()");
2128  }
2129 
2130  std::shared_ptr<std::vector<kernel_id>> KernelIDs{new std::vector<kernel_id>};
2131  std::vector<unsigned char> NewSpecConstBlob;
2132  device_image_impl::SpecConstMapT NewSpecConstMap;
2133 
2134  std::shared_ptr<device_image_impl> DeviceImageImpl =
2135  getSyclObjImpl(DeviceImage);
2136 
2137  // Duplicates are not expected here, otherwise piProgramLink should fail
2138  KernelIDs->insert(KernelIDs->end(),
2139  DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2140  DeviceImageImpl->get_kernel_ids_ptr()->end());
2141 
2142  // To be able to answer queries about specialziation constants, the new
2143  // device image should have the specialization constants from all the linked
2144  // images.
2145  {
2146  const std::lock_guard<std::mutex> SpecConstLock(
2147  DeviceImageImpl->get_spec_const_data_lock());
2148 
2149  // Copy all map entries to the new map. Since the blob will be copied to
2150  // the end of the new blob we need to move the blob offset of each entry.
2151  for (const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) {
2152  std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2153  NewSpecConstMap[SpecConstIt.first];
2154  assert(NewDescEntries.empty() &&
2155  "Specialization constant already exists in the map.");
2156  NewDescEntries.reserve(SpecConstIt.second.size());
2157  for (const device_image_impl::SpecConstDescT &SpecConstDesc :
2158  SpecConstIt.second) {
2159  device_image_impl::SpecConstDescT NewSpecConstDesc = SpecConstDesc;
2160  NewSpecConstDesc.BlobOffset += NewSpecConstBlob.size();
2161  NewDescEntries.push_back(std::move(NewSpecConstDesc));
2162  }
2163  }
2164 
2165  // Copy the blob from the device image into the new blob. This moves the
2166  // offsets of the following blobs.
2167  NewSpecConstBlob.insert(NewSpecConstBlob.end(),
2168  DeviceImageImpl->get_spec_const_blob_ref().begin(),
2169  DeviceImageImpl->get_spec_const_blob_ref().end());
2170  }
2171 
2172  // device_image_impl expects kernel ids to be sorted for fast search
2173  std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash<kernel_id>{});
2174 
2175  auto BinImg = getSyclObjImpl(DeviceImage)->get_bin_image_ref();
2176  DeviceImageImplPtr ExecutableImpl =
2177  std::make_shared<detail::device_image_impl>(
2178  BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs),
2179  LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob));
2180 
2181  // TODO: Make multiple sets of device images organized by devices they are
2182  // compiled for.
2183  return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2184 }
2185 
2186 // The function duplicates most of the code from existing getBuiltPIProgram.
2187 // The differences are:
2188 // Different API - uses different objects to extract required info
2189 // Supports caching of a program built for multiple devices
2191  const std::vector<device> &Devs,
2192  const property_list &PropList) {
2193  (void)PropList;
2194 
2195  const std::shared_ptr<device_image_impl> &InputImpl =
2196  getSyclObjImpl(DeviceImage);
2197 
2198  const context Context = InputImpl->get_context();
2199 
2200  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2201 
2202  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
2203 
2204  std::string CompileOpts;
2205  std::string LinkOpts;
2206  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
2207 
2208  const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref();
2209  const RTDeviceBinaryImage &Img = *ImgPtr;
2210 
2211  SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2212 
2213  // TODO: Unify this code with getBuiltPIProgram
2214  auto BuildF = [this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2215  &InputImpl, SpecConsts] {
2216  ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2217  const PluginPtr &Plugin = ContextImpl->getPlugin();
2218  applyOptionsFromImage(CompileOpts, LinkOpts, Img, Devs, Plugin);
2219 
2220  // TODO: Add support for creating non-SPIRV programs from multiple devices.
2221  if (InputImpl->get_bin_image_ref()->getFormat() !=
2223  Devs.size() > 1)
2224  sycl::runtime_error(
2225  "Creating a program from AOT binary for multiple device is not "
2226  "supported",
2227  PI_ERROR_INVALID_OPERATION);
2228 
2229  // Device is not used when creating program from SPIRV, so passing only one
2230  // device is OK.
2231  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
2232  Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2233 
2234  if (!DeviceCodeWasInCache &&
2235  InputImpl->get_bin_image_ref()->supportsSpecConstants())
2236  setSpecializationConstants(InputImpl, NativePrg, Plugin);
2237 
2238  ProgramPtr ProgramManaged(
2239  NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
2240 
2241  // Link a fallback implementation of device libraries if they are not
2242  // supported by a device compiler.
2243  // Pre-compiled programs are supposed to be already linked.
2244  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
2245  // no fallback device library will be linked.
2246  uint32_t DeviceLibReqMask = 0;
2247  if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV &&
2249  DeviceLibReqMask = getDeviceLibReqMask(Img);
2250 
2251  ProgramPtr BuiltProgram =
2252  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2253  getRawSyclObjImpl(Devs[0])->getHandleRef(), DeviceLibReqMask);
2254 
2255  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
2256 
2257  {
2258  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2259  NativePrograms[BuiltProgram.get()] = &Img;
2260  }
2261 
2262  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2263 
2264  // Save program to persistent cache if it is not there
2265  if (!DeviceCodeWasInCache)
2266  PersistentDeviceCodeCache::putItemToDisc(
2267  Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
2268 
2269  return BuiltProgram.release();
2270  };
2271 
2272  uint32_t ImgId = Img.getImageID();
2274  getRawSyclObjImpl(Devs[0])->getHandleRef();
2275  auto CacheKey =
2276  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
2277  std::make_pair(PiDevice, CompileOpts + LinkOpts));
2278 
2279  // CacheKey is captured by reference so when we overwrite it later we can
2280  // reuse this function.
2281  auto GetCachedBuildF = [&Cache, &CacheKey]() {
2282  return Cache.getOrInsertProgram(CacheKey);
2283  };
2284 
2285  // TODO: Throw SYCL2020 style exception
2286  auto BuildResult =
2287  getOrBuild<sycl::detail::pi::PiProgram, compile_program_error>(
2288  Cache, GetCachedBuildF, BuildF);
2289  // getOrBuild is not supposed to return nullptr
2290  assert(BuildResult != nullptr && "Invalid build result");
2291 
2292  sycl::detail::pi::PiProgram ResProgram = *BuildResult->Ptr.load();
2293 
2294  // Cache supports key with once device only, but here we have multiple
2295  // devices a program is built for, so add the program to the cache for all
2296  // other devices.
2297  const PluginPtr &Plugin = ContextImpl->getPlugin();
2298  auto CacheOtherDevices = [ResProgram, &Plugin]() {
2299  Plugin->call<PiApiKind::piProgramRetain>(ResProgram);
2300  return ResProgram;
2301  };
2302 
2303  // The program for device "0" is already added to the cache during the first
2304  // call to getOrBuild, so starting with "1"
2305  for (size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2306  const sycl::detail::pi::PiDevice PiDeviceAdd =
2307  getRawSyclObjImpl(Devs[Idx])->getHandleRef();
2308 
2309  // Change device in the cache key to reduce copying of spec const data.
2310  CacheKey.second.first = PiDeviceAdd;
2311  getOrBuild<sycl::detail::pi::PiProgram, compile_program_error>(
2312  Cache, GetCachedBuildF, CacheOtherDevices);
2313  // getOrBuild is not supposed to return nullptr
2314  assert(BuildResult != nullptr && "Invalid build result");
2315  }
2316 
2317  // devive_image_impl shares ownership of PIProgram with, at least, program
2318  // cache. The ref counter will be descremented in the destructor of
2319  // device_image_impl
2320  Plugin->call<PiApiKind::piProgramRetain>(ResProgram);
2321 
2322  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
2323  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
2324  InputImpl->get_kernel_ids_ptr(), ResProgram,
2325  InputImpl->get_spec_const_data_ref(),
2326  InputImpl->get_spec_const_blob_ref());
2327 
2328  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2329 }
2330 
2331 std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *>
2332 ProgramManager::getOrCreateKernel(const context &Context,
2333  const std::string &KernelName,
2334  const property_list &PropList,
2335  sycl::detail::pi::PiProgram Program) {
2336 
2337  (void)PropList;
2338 
2339  const ContextImplPtr Ctx = getSyclObjImpl(Context);
2340 
2341  KernelProgramCache &Cache = Ctx->getKernelProgramCache();
2342 
2343  auto BuildF = [this, &Program, &KernelName, &Ctx] {
2344  sycl::detail::pi::PiKernel Kernel = nullptr;
2345 
2346  const PluginPtr &Plugin = Ctx->getPlugin();
2347  Plugin->call<PiApiKind::piKernelCreate>(Program, KernelName.c_str(),
2348  &Kernel);
2349 
2351  sizeof(pi_bool), &PI_TRUE);
2352 
2353  // Ignore possible m_UseSpvFile for now.
2354  // TODO consider making m_UseSpvFile interact with kernel bundles as well.
2355  const KernelArgMask *KernelArgMask =
2356  getEliminatedKernelArgMask(Program, KernelName);
2357 
2358  return std::make_pair(Kernel, KernelArgMask);
2359  };
2360 
2361  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2362  return Cache.getOrInsertKernel(Program, KernelName);
2363  };
2364 
2365  auto BuildResult =
2366  getOrBuild<KernelProgramCache::KernelArgMaskPairT, invalid_object_error>(
2367  Cache, GetCachedBuildF, BuildF);
2368  // getOrBuild is not supposed to return nullptr
2369  assert(BuildResult != nullptr && "Invalid build result");
2370  return std::make_tuple(BuildResult->Ptr.load()->first,
2371  &(BuildResult->MBuildResultMutex),
2372  BuildResult->Ptr.load()->second);
2373 }
2374 
2376  const RTDeviceBinaryImage &Img) {
2377  return !checkDevSupportDeviceRequirements(Dev, Img).has_value();
2378 }
2379 
2380 static std::string getAspectNameStr(sycl::aspect AspectNum) {
2381 #define __SYCL_ASPECT(ASPECT, ID) \
2382  case aspect::ASPECT: \
2383  return #ASPECT;
2384 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
2385 // We don't need "case aspect::usm_allocator" here because it will duplicate
2386 // "case aspect::usm_system_allocations", therefore leave this macro empty
2387 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
2388  switch (AspectNum) {
2389 #include <sycl/info/aspects.def>
2390 #include <sycl/info/aspects_deprecated.def>
2391  }
2393  "Unknown aspect " +
2394  std::to_string(static_cast<unsigned>(AspectNum)));
2395 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2396 #undef __SYCL_ASPECT_DEPRECATED
2397 #undef __SYCL_ASPECT
2398 }
2399 
2400 // Check if the multiplication over unsigned integers overflows
2401 template <typename T>
2402 static std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
2404  if (y == 0)
2405  return 0;
2406  if (x > std::numeric_limits<T>::max() / y)
2407  return {};
2408  else
2409  return x * y;
2410 }
2411 
2412 std::optional<sycl::exception>
2414  const RTDeviceBinaryImage &Img) {
2415  auto getPropIt = [&Img](const std::string &PropName) {
2416  const RTDeviceBinaryImage::PropertyRange &PropRange =
2417  Img.getDeviceRequirements();
2419  PropRange.begin(), PropRange.end(),
2421  return (*Prop)->Name == PropName;
2422  });
2423  return (PropIt == PropRange.end())
2424  ? std::nullopt
2425  : std::optional<
2427  };
2428 
2429  auto AspectsPropIt = getPropIt("aspects");
2430  auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size");
2431  auto ReqdWGSizeUint64TPropIt = getPropIt("reqd_work_group_size_uint64_t");
2432  auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size");
2433 
2434  // Checking if device supports defined aspects
2435  if (AspectsPropIt) {
2436  ByteArray Aspects =
2437  DeviceBinaryProperty(*(AspectsPropIt.value())).asByteArray();
2438  // Drop 8 bytes describing the size of the byte array.
2439  Aspects.dropBytes(8);
2440  while (!Aspects.empty()) {
2441  aspect Aspect = Aspects.consume<aspect>();
2442  if (!Dev.has(Aspect))
2444  "Required aspect " + getAspectNameStr(Aspect) +
2445  " is not supported on the device");
2446  }
2447  }
2448 
2449  // Checking if device supports defined required work group size
2450  if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
2452  // stores its values as uint32_t, but this needed to be expanded to
2453  // uint64_t. However, this change did not happen in ABI-breaking
2454  // window, so we attach the required work-group size as the
2455  // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
2456  // window, we can remove the logic for the 32 bit property.
2457  bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value();
2458  auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
2459 
2460  ByteArray ReqdWGSize = DeviceBinaryProperty(*(it.value())).asByteArray();
2461  // Drop 8 bytes describing the size of the byte array.
2462  ReqdWGSize.dropBytes(8);
2463  uint64_t ReqdWGSizeAllDimsTotal = 1;
2464  std::vector<uint64_t> ReqdWGSizeVec;
2465  int Dims = 0;
2466  while (!ReqdWGSize.empty()) {
2467  uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.consume<uint64_t>()
2468  : ReqdWGSize.consume<uint32_t>();
2469  if (auto res = multiply_with_overflow_check(ReqdWGSizeAllDimsTotal,
2470  SingleDimSize))
2471  ReqdWGSizeAllDimsTotal = *res;
2472  else
2473  return sycl::exception(
2474  sycl::errc::kernel_not_supported,
2475  "Required work-group size is not supported"
2476  " (total number of work-items requested can't fit into size_t)");
2477  ReqdWGSizeVec.push_back(SingleDimSize);
2478  Dims++;
2479  }
2480 
2481  // The SingleDimSize was computed in an uint64_t; size_t does not
2482  // necessarily have to be the same uint64_t (but should fit in an
2483  // uint64_t).
2484  if (ReqdWGSizeAllDimsTotal >
2485  Dev.get_info<info::device::max_work_group_size>())
2486  return sycl::exception(sycl::errc::kernel_not_supported,
2487  "Required work-group size " +
2488  std::to_string(ReqdWGSizeAllDimsTotal) +
2489  " is not supported on the device");
2490  // Creating std::variant to call max_work_item_sizes one time to avoid
2491  // performance drop
2492  std::variant<id<1>, id<2>, id<3>> MaxWorkItemSizesVariant;
2493  if (Dims == 1)
2494  MaxWorkItemSizesVariant =
2496  else if (Dims == 2)
2497  MaxWorkItemSizesVariant =
2499  else // (Dims == 3)
2500  MaxWorkItemSizesVariant =
2502  for (int i = 0; i < Dims; i++) {
2503  // Extracting value from std::variant to avoid dealing with type-safety
2504  // issues after that
2505  if (Dims == 1) {
2506  // ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes
2507  if (ReqdWGSizeVec[i] >
2508  std::get<id<1>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2509  return sycl::exception(sycl::errc::kernel_not_supported,
2510  "Required work-group size " +
2511  std::to_string(ReqdWGSizeVec[i]) +
2512  " is not supported");
2513  } else if (Dims == 2) {
2514  if (ReqdWGSizeVec[i] >
2515  std::get<id<2>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2516  return sycl::exception(sycl::errc::kernel_not_supported,
2517  "Required work-group size " +
2518  std::to_string(ReqdWGSizeVec[i]) +
2519  " is not supported");
2520  } else // (Dims == 3)
2521  if (ReqdWGSizeVec[i] >
2522  std::get<id<3>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2523  return sycl::exception(sycl::errc::kernel_not_supported,
2524  "Required work-group size " +
2525  std::to_string(ReqdWGSizeVec[i]) +
2526  " is not supported");
2527  }
2528  }
2529 
2530  // Check if device supports required sub-group size.
2531  if (ReqdSubGroupSizePropIt) {
2532  auto ReqdSubGroupSize =
2533  DeviceBinaryProperty(*(ReqdSubGroupSizePropIt.value())).asUint32();
2534  auto SupportedSubGroupSizes = Dev.get_info<info::device::sub_group_sizes>();
2535  // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD,
2536  // as ESIMD images have a reqd-sub-group-size of 1, but currently
2537  // no backend currently includes 1 as a valid sub-group size.
2538  // This can be removed if backends add 1 as a valid sub-group size.
2539  if (!getUint32PropAsBool(Img, "isEsimdImage") &&
2540  std::none_of(SupportedSubGroupSizes.cbegin(),
2541  SupportedSubGroupSizes.cend(),
2542  [=](auto s) { return s == ReqdSubGroupSize; }))
2543  return sycl::exception(sycl::errc::kernel_not_supported,
2544  "Sub-group size " +
2545  std::to_string(ReqdSubGroupSize) +
2546  " is not supported on the device");
2547  }
2548 
2549  return {};
2550 }
2551 
2552 } // namespace detail
2553 } // namespace _V1
2554 } // namespace sycl
2555 
2557  sycl::detail::ProgramManager::getInstance().addImages(desc);
2558 }
2559 
2560 // Executed as a part of current module's (.exe, .dll) static initialization
2562  (void)desc;
2563  // TODO implement the function
2564 }
sycl::_V1::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:730
sycl::_V1::detail::ProgramManager::flushSpecConstants
void flushSpecConstants(const program_impl &Prg, pi::PiProgram NativePrg=nullptr, const RTDeviceBinaryImage *Img=nullptr)
Resolves given program to a device binary image and requests the program to flush constants the image...
Definition: program_manager.cpp:1470
sycl::_V1::detail::ProgramManager::getHostPipeEntry
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
Definition: program_manager.cpp:1714
sycl::_V1::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: helpers.cpp:22
PI_DEVICE_BINARY_TYPE_NONE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE
Definition: pi.h:876
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_cuda.cpp:335
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:31
aspects.hpp
sycl::_V1::detail::emitBuiltProgramInfo
static void emitBuiltProgramInfo(const pi_program &Prog, const ContextImplPtr &Context)
Emits information about built programs if the appropriate contitions are met, namely when SYCL_RT_WAR...
Definition: program_manager.cpp:564
sycl::_V1::detail::program_impl::getHandleRef
sycl::detail::pi::PiProgram & getHandleRef()
Definition: program_impl.hpp:134
event_impl.hpp
_pi_offload_entry_struct
Definition: pi.h:833
sycl::_V1::detail::PersistentDeviceCodeCache::getItemFromDisc
static std::vector< std::vector< char > > getItemFromDisc(const device &Device, const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, const std::string &BuildOptionsString)
Definition: persistent_device_code_cache.cpp:150
sycl::_V1::detail::loadDeviceLibFallback
static sycl::detail::pi::PiProgram loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, const sycl::detail::pi::PiDevice &Device, bool UseNativeLib)
Definition: program_manager.cpp:897
sycl::_V1::detail::getRawSyclObjImpl
std::add_pointer_t< typename decltype(T::impl)::element_type > getRawSyclObjImpl(const T &SyclObject)
Definition: impl_utils.hpp:40
sycl::_V1::detail::pi::PiDeviceBinaryType
::pi_device_binary_type PiDeviceBinaryType
Definition: pi.hpp:134
sycl::_V1::backend
backend
Definition: backend_types.hpp:18
sycl::_V1::detail::ProgramManager::ProgramManager
ProgramManager()
Definition: program_manager.cpp:940
sycl::_V1::detail::RTDeviceBinaryImage::getRawData
const pi_device_binary_struct & getRawData() const
Definition: device_binary_image.hpp:151
sycl::_V1::detail::device_image_impl::SpecConstMapT
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
Definition: device_image_impl.hpp:56
sycl::_V1::detail::OSUtil::getCurrentDSODir
static std::string getCurrentDSODir()
Returns an absolute path to a directory where the object was found.
pi_bool
pi_uint32 pi_bool
Definition: pi.h:196
context_impl.hpp
sycl::_V1::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:648
sycl::_V1::detail::device_image_impl::SpecConstDescT
Definition: device_image_impl.hpp:48
pi_device_binaries_struct::DeviceBinaries
pi_device_binary DeviceBinaries
Device binaries data.
Definition: pi.h:1064
sycl::_V1::detail::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:33
sycl::_V1::detail::RTDeviceBinaryImage::getSize
size_t getSize() const
Definition: device_binary_image.hpp:156
PI_DEVICE_BINARY_TYPE_SPIRV
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV
Definition: pi.h:881
type_traits.hpp
PI_DEVICE_INFO_NAME
@ PI_DEVICE_INFO_NAME
Definition: pi.h:348
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_complex
@ cl_intel_devicelib_complex
sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:94
sycl::_V1::detail::doesDevSupportDeviceRequirements
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:2375
pi_device_binary_struct::DeviceTargetSpec
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
Definition: pi.h:979
config.hpp
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:1406
sycl::_V1::detail::ProgramManager::build
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
Definition: program_manager.cpp:2190
sycl::_V1::detail::DeviceLibExtensionStrs
static const std::map< DeviceLibExt, const char * > DeviceLibExtensionStrs
Definition: program_manager.cpp:873
sycl::_V1::backend::ext_oneapi_level_zero
@ ext_oneapi_level_zero
PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
Definition: pi.h:883
sycl::_V1::detail::createSpirvProgram
static sycl::detail::pi::PiProgram createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
Definition: program_manager.cpp:102
stl.hpp
sycl::_V1::detail::RTDeviceBinaryImage::getDeviceLibReqMask
const PropertyRange & getDeviceLibReqMask() const
Definition: device_binary_image.hpp:208
sycl::_V1::detail::ProgramManager::getBuiltInKernelID
kernel_id getBuiltInKernelID(const std::string &KernelName)
Definition: program_manager.cpp:1627
sycl::_V1::detail::ProgramManager::getOrCreateKernel
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const program_impl *Prg)
Definition: program_manager.cpp:689
sycl::_V1::errc::feature_not_supported
@ feature_not_supported
sycl::_V1::detail::DeviceImageImplPtr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
Definition: kernel_bundle.hpp:79
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_cuda.cpp:314
sycl::_V1::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:683
sycl::_V1::detail::ProgramManager::getBuiltPIProgram
sycl::detail::pi::PiProgram getBuiltPIProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const program_impl *Prg=nullptr, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
Definition: program_manager.cpp:573
sycl::_V1::detail::ProgramManager::getEliminatedKernelArgMask
const KernelArgMask * getEliminatedKernelArgMask(pi::PiProgram NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
Definition: program_manager.cpp:1522
sycl::_V1::detail::checkDevSupportDeviceRequirements
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:2413
sycl::_V1::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:65
sycl::_V1::detail::getDeviceLibFilename
static const char * getDeviceLibFilename(DeviceLibExt Extension, bool Native)
Definition: program_manager.cpp:859
sycl::_V1::device::has
bool has(aspect Aspect) const __SYCL_WARN_IMAGE_ASPECT(Aspect)
Indicates if the SYCL device has the given feature.
Definition: device.cpp:209
pi_device_binary_struct::BinaryStart
const unsigned char * BinaryStart
Pointer to the target code start.
Definition: pi.h:991
_pi_result
_pi_result
Definition: pi.h:205
sycl::_V1::detail::ByteArray::empty
bool empty() const
Definition: device_binary_image.hpp:48
sycl::_V1::detail::GlobalHandler::instance
static GlobalHandler & instance()
Definition: global_handler.cpp:125
sycl::_V1::detail::RTDeviceBinaryImage::getDeviceRequirements
const PropertyRange & getDeviceRequirements() const
Definition: device_binary_image.hpp:216
sycl::_V1::context::get_devices
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:139
sycl::_V1::detail::DeviceBinaryProperty::asByteArray
ByteArray asByteArray() const
Definition: device_binary_image.cpp:69
sycl::_V1::errc::kernel_argument
@ kernel_argument
common.hpp
sycl::_V1::detail::loadDeviceLib
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, sycl::detail::pi::PiProgram &Prog)
Definition: program_manager.cpp:813
sycl::_V1::detail::appendCompileOptionsForGRFSizeProperties
static void appendCompileOptionsForGRFSizeProperties(std::string &CompileOpts, const RTDeviceBinaryImage &Img, bool IsEsimdImage)
Definition: program_manager.cpp:368
sycl::_V1::detail::pi::PiDevice
::pi_device PiDevice
Definition: pi.hpp:131
sycl::_V1::bundle_state::object
@ object
sycl::_V1::detail::SYCLConfig
Definition: config.hpp:111
sycl::_V1::errc::build
@ build
sycl::_V1::detail::program_impl::get
cl_program get() const
Returns a valid cl_program instance.
Definition: program_impl.cpp:217
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:119
sycl::_V1::detail::KernelProgramCache::KernelArgMaskPairT
std::pair< sycl::detail::pi::PiKernel, const KernelArgMask * > KernelArgMaskPairT
Definition: kernel_program_cache.hpp:92
os_util.hpp
sycl::_V1::detail::DeviceGlobalMapEntry
Definition: device_global_map_entry.hpp:52
sycl::_V1::detail::program_impl::get_build_options
std::string get_build_options() const
Returns the compile, link, or build options, from whichever of those operations was performed most re...
Definition: program_impl.hpp:272
sycl::_V1::detail::KernelProgramCache::saveKernel
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
Definition: kernel_program_cache.hpp:192
detail
---— Error handling, matching OpenCL plugin semantics.
Definition: common.hpp:44
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:1056
pi_device_binary_struct::BinaryEnd
const unsigned char * BinaryEnd
Pointer to the target code end.
Definition: pi.h:993
sycl::_V1::detail::multiply_with_overflow_check
static std::enable_if_t< std::is_unsigned_v< T >, std::optional< T > > multiply_with_overflow_check(T x, T y)
Definition: program_manager.cpp:2403
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf
@ cl_intel_devicelib_imf
sycl::_V1::detail::KernelProgramCache::getOrInsertKernel
std::pair< BuildResult< KernelArgMaskPairT > *, bool > getOrInsertKernel(sycl::detail::pi::PiProgram Program, const std::string &KernelName)
Definition: kernel_program_cache.hpp:143
sycl::_V1::info::device::max_work_item_sizes
Definition: info_desc.hpp:116
sycl
Definition: access.hpp:18
sycl::_V1::detail::applyCompileOptionsFromEnvironment
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts)
Definition: program_manager.cpp:510
sycl::_V1::detail::PiInfoCode
Definition: info_desc_helpers.hpp:25
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf_bf16
@ cl_intel_devicelib_imf_bf16
sycl::_V1::detail::program_impl::hasSetSpecConstants
bool hasSetSpecConstants() const
Tells whether a specialization constant has been set for this program.
Definition: program_impl.hpp:294
PI_USM_INDIRECT_ACCESS
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1532
device_impl.hpp
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
PI_CONTEXT_INFO_NUM_DEVICES
@ PI_CONTEXT_INFO_NUM_DEVICES
Definition: pi.h:451
sycl::_V1::detail::make_tuple
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:35
queue_impl.hpp
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::size
size_t size() const
Definition: device_binary_image.hpp:116
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:907
PI_KERNEL_INFO_PROGRAM
@ PI_KERNEL_INFO_PROGRAM
Definition: pi.h:482
sycl::_V1::detail::ProgramManager::addImages
void addImages(pi_device_binaries DeviceImages)
Definition: program_manager.cpp:1281
sycl::_V1::detail::KernelProgramCache::BuildState
BuildState
Denotes the state of a build.
Definition: kernel_program_cache.hpp:46
sycl::_V1::id
A unique identifier of an item in an index space.
Definition: array.hpp:20
sycl::_V1::backend::ext_oneapi_cuda
@ ext_oneapi_cuda
_pi_kernel
Definition: pi_cuda.hpp:72
sycl::_V1::exception::get_cl_code
cl_int get_cl_code() const
Definition: exception.cpp:87
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::ConstIterator
Definition: device_binary_image.hpp:90
std::clog
__SYCL_EXTERN_STREAM_ATTRS ostream clog
Linked to standard error (buffered)
sycl::_V1::detail::enableITTAnnotationsIfNeeded
static void enableITTAnnotationsIfNeeded(const sycl::detail::pi::PiProgram &Prog, const PluginPtr &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
Definition: program_manager.cpp:58
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:266
sycl::_V1::detail::DeviceBinaryProperty::asUint32
pi_uint32 asUint32() const
Definition: device_binary_image.cpp:61
sycl::_V1::detail::ProgramManager::getDeviceImage
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
Definition: program_manager.cpp:1021
sycl::_V1::detail::ByteArray
Definition: device_binary_image.hpp:23
sycl::_V1::detail::isDeviceLibRequired
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:1110
sycl::_V1::detail::ProgramManager::getInstance
static ProgramManager & getInstance()
Definition: program_manager.cpp:67
sycl::_V1::detail::DeviceBinaryProperty
Definition: device_binary_image.hpp:64
sycl::_V1::detail::getDeviceLibExtensionStr
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
Definition: program_manager.cpp:888
kernel_properties.hpp
sycl::_V1::detail::ProgramManager::getSYCLKernelID
kernel_id getSYCLKernelID(const std::string &KernelName)
Definition: program_manager.cpp:1595
sycl::_V1::detail::getFormatStr
static const char * getFormatStr(sycl::detail::pi::PiDeviceBinaryType Format)
Definition: program_manager.cpp:252
sycl::_V1::detail::isDeviceBinaryTypeSupported
static bool isDeviceBinaryTypeSupported(const context &C, sycl::detail::pi::PiDeviceBinaryType Format)
Definition: program_manager.cpp:209
sycl::_V1::detail::RTDeviceBinaryImage::getLinkOptions
const char * getLinkOptions() const
Definition: device_binary_image.hpp:166
sycl::_V1::detail::RTDeviceBinaryImage::getDeviceGlobals
const PropertyRange & getDeviceGlobals() const
Definition: device_binary_image.hpp:215
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
sycl::_V1::detail::LessByHash
Definition: device_image_impl.hpp:35
sycl::_V1::detail::DbgProgMgr
static constexpr int DbgProgMgr
Definition: program_manager.cpp:51
pi_uint32
uint32_t pi_uint32
Definition: pi.h:194
sycl::_V1::detail::RTDeviceBinaryImage::supportsSpecConstants
bool supportsSpecConstants() const
Definition: device_binary_image.hpp:147
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::end
ConstIterator end() const
Definition: device_binary_image.hpp:115
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:959
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:2556
program_impl.hpp
platform_impl.hpp
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_math
@ cl_intel_devicelib_math
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_bfloat16
@ cl_intel_devicelib_bfloat16
sycl::_V1::detail::compatibleWithDevice
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
Definition: program_manager.cpp:1570
pi_device_binaries_struct::NumDeviceBinaries
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
Definition: pi.h:1062
sycl::_V1::detail::ProgramManager::getDeviceGlobalEntries
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
Definition: program_manager.cpp:1679
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_cuda.cpp:247
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_cuda.cpp:1021
PI_DEVICE_BINARY_TYPE_NATIVE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE
Definition: pi.h:878
sycl::_V1::detail::applyOptionsFromImage
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devices, const PluginPtr &Plugin)
Definition: program_manager.cpp:501
sycl::_V1::detail::ProgramManager::getOrCreatePIProgram
std::pair< sycl::detail::pi::PiProgram, bool > getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a PI program using either a cached device code binary if present in the persistent cache or f...
Definition: program_manager.cpp:537
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_assert
@ cl_intel_devicelib_assert
sycl::_V1::detail::getBinImageFromMultiMap
RTDeviceBinaryImage * getBinImageFromMultiMap(const std::unordered_multimap< StorageKey, RTDeviceBinaryImage * > &ImagesSet, const StorageKey &Key, const context &Context, const device &Device)
Definition: program_manager.cpp:996
sycl::_V1::detail::KernelArgMask
std::vector< bool > KernelArgMask
Definition: kernel_arg_mask.hpp:16
sycl::_V1::detail::ProgramManager::getDeviceLibReqMask
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:1512
sycl::_V1::detail::RTDeviceBinaryImage::getCompileOptions
const char * getCompileOptions() const
Definition: device_binary_image.hpp:161
sycl::_V1::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:51
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:73
sycl::_V1::exception
Definition: exception.hpp:68
sycl::_V1::detail::pi::PiProgram
::pi_program PiProgram
Definition: pi.hpp:137
std::cerr
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
sycl::_V1::detail::ProgramManager::getDeviceImageFromBinaryImage
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
Definition: program_manager.cpp:1728
sycl::_V1::detail::ProgramManager::hasCompatibleImage
bool hasCompatibleImage(const device &Dev)
Definition: program_manager.cpp:1606
global_handler.hpp
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:59
device.hpp
sycl::_V1::detail::KernelProgramCache::notifyAllBuild
void notifyAllBuild(BuildResult< T > &BR) const
Definition: kernel_program_cache.hpp:177
sycl::_V1::detail::UseSpvEnv
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
none_of
bool none_of(const simd_mask< _Tp, _Abi > &) noexcept
_pi_device_binary_property_struct
Definition: pi.h:855
_pi_program
Definition: pi_cuda.hpp:68
sycl::_V1::detail::GlobalHandler::getProgramManager
ProgramManager & getProgramManager()
Definition: global_handler.cpp:177
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf_fp64
@ cl_intel_devicelib_imf_fp64
sycl::_V1::detail::ProgramManager::addOrInitHostPipeEntry
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
Definition: program_manager.cpp:1697
sycl::_V1::detail::ProgramManager::kernelUsesAssert
bool kernelUsesAssert(const std::string &KernelName) const
Definition: program_manager.cpp:1277
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_complex_fp64
@ cl_intel_devicelib_complex_fp64
sycl::_V1::detail::RTDeviceBinaryImage::getProgramMetadata
const PropertyRange & getProgramMetadata() const
Definition: device_binary_image.hpp:213
program_manager.hpp
sycl::_V1::bundle_state::input
@ input
sycl::_V1::errc::invalid
@ invalid
sycl::_V1::detail::RTDeviceBinaryImage::dump
virtual void dump(std::ostream &Out) const
Definition: device_binary_image.cpp:134
sycl::_V1::detail::ProgramManager::getPiProgramFromPiKernel
sycl::detail::pi::PiProgram getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, const ContextImplPtr Context)
Definition: program_manager.cpp:755
sycl::_V1::detail::pi::PiKernel
::pi_kernel PiKernel
Definition: pi.hpp:138
persistent_device_code_cache.hpp
sycl::_V1::detail::RTDeviceBinaryImage::getProperty
pi_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
Definition: device_binary_image.cpp:140
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::begin
ConstIterator begin() const
Definition: device_binary_image.hpp:114
sycl::_V1::detail::applyLinkOptionsFromEnvironment
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts)
Definition: program_manager.cpp:520
sycl::_V1::detail::getDeviceLibPrograms
static std::vector< sycl::detail::pi::PiProgram > getDeviceLibPrograms(const ContextImplPtr Context, const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:1118
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:2561
sycl::_V1::detail::ProgramManager::getSYCLDeviceImagesWithCompatibleState
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
Definition: program_manager.cpp:1749
all_of
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
sycl::_V1::detail::ProgramManager::createPIProgram
sycl::detail::pi::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
Definition: program_manager.cpp:268
sycl::_V1::detail::appendCompileOptionsFromImage
static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devs, const PluginPtr &)
Definition: program_manager.cpp:412
sycl::_V1::errc::kernel_not_supported
@ kernel_not_supported
device_image_impl.hpp
sycl::_V1::backend::opencl
@ opencl
sycl::_V1::detail::device_image_plain
Definition: kernel_bundle.hpp:83
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:384
sycl::_V1::detail::DeviceLibNames
static const std::map< DeviceLibExt, std::pair< const char *, const char * > > DeviceLibNames
Definition: program_manager.cpp:837
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:360
sycl::_V1::detail::pi::getBinaryImageFormat
PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
Definition: pi.cpp:674
backend_types.hpp
sycl::_V1::detail::getOrBuild
KernelProgramCache::BuildResult< RetT > * getOrBuild(KernelProgramCache &KPCache, GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
Definition: program_manager.cpp:133
sycl::_V1::device::get_info
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.cpp:136
sycl::_V1::detail::OSUtil::DirSep
static constexpr const char * DirSep
Definition: os_util.hpp:55
sycl::_V1::detail::ByteArray::consume
auto consume()
Definition: device_binary_image.hpp:33
sycl::_V1::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:72
sycl::_V1::detail::RTDeviceBinaryImage::getImageID
std::uintptr_t getImageID() const
Definition: device_binary_image.hpp:221
exception.hpp
sycl::_V1::detail::ProgramManager::getProgramBuildLog
static std::string getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, const ContextImplPtr Context)
Definition: program_manager.cpp:766
sycl::_V1::detail::applyOptionsFromEnvironment
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
Definition: program_manager.cpp:529
sycl::_V1::detail::HostPipeMapEntry
Definition: host_pipe_map_entry.hpp:19
sycl::_V1::context::get_platform
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:135
sycl::_V1::detail::ProgramManager::getDeviceGlobalEntry
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
Definition: program_manager.cpp:1672
sycl::_V1::detail::createBinaryProgram
static sycl::detail::pi::PiProgram createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< pi_device_binary_property > Metadata)
Definition: program_manager.cpp:72
sycl::_V1::detail::program_impl::stableSerializeSpecConstRegistry
void stableSerializeSpecConstRegistry(SerializedObj &Dst) const
Definition: program_impl.hpp:289
sycl::_V1::detail::ByteArray::dropBytes
void dropBytes(std::size_t Bytes)
Definition: device_binary_image.hpp:40
sycl::_V1::detail::setSpecializationConstants
static void setSpecializationConstants(const std::shared_ptr< device_image_impl > &InputImpl, sycl::detail::pi::PiProgram Prog, const PluginPtr &Plugin)
Definition: program_manager.cpp:1999
sycl::_V1::detail::ProgramManager::addOrInitDeviceGlobalEntry
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
Definition: program_manager.cpp:1640
sycl::_V1::detail::register_alloc_mode_enum::large
@ large
sycl::_V1::detail::getUint32PropAsBool
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, const char *PropName)
Definition: program_manager.cpp:347
piProgramCreate
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_cuda.cpp:242
sycl::_V1::detail::appendLinkOptionsFromImage
static void appendLinkOptionsFromImage(std::string &LinkOpts, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:333
sycl::_V1::detail::getAspectNameStr
static std::string getAspectNameStr(sycl::aspect AspectNum)
Definition: program_manager.cpp:2380
sycl::_V1::detail::program_impl
Definition: program_impl.hpp:38
sycl::_V1::detail::getUint32PropAsOptStr
static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, const char *PropName)
Definition: program_manager.cpp:353
piProgramGetBuildInfo
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:304
sycl::_V1::detail::register_alloc_mode_enum::automatic
@ automatic
context.hpp
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange::isAvailable
bool isAvailable() const
Definition: device_binary_image.hpp:118
any_of
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
util.hpp
piextDeviceSelectBinary
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_cuda.cpp:88
sycl::_V1::detail::RTDeviceBinaryImage::print
virtual void print() const
Definition: device_binary_image.cpp:100
sycl::_V1::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:68
sycl::_V1::detail::KernelProgramCache::getOrInsertProgram
std::pair< ProgramWithBuildStateT *, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
Definition: kernel_program_cache.hpp:125
sycl::_V1::detail::CheckJITCompilationForImage
void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image, bool JITCompilationIsRequired)
Definition: program_manager.cpp:978
sycl::_V1::detail::ProgramManager
Definition: program_manager.hpp:87
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_math_fp64
@ cl_intel_devicelib_math_fp64
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:906
sycl::_V1::detail::ProgramManager::debugPrintBinaryImages
void debugPrintBinaryImages() const
Definition: program_manager.cpp:1436
sycl::_V1::detail::KernelProgramCache::waitUntilBuilt
void waitUntilBuilt(BuildResult< T > &BR, Predicate Pred) const
Definition: kernel_program_cache.hpp:154
pi_device_binary_struct::EntriesEnd
_pi_offload_entry EntriesEnd
Definition: pi.h:996
sycl::_V1::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
PI_PROGRAM_INFO_DEVICES
@ PI_PROGRAM_INFO_DEVICES
Definition: pi.h:440
sycl::_V1::exception::what
const char * what() const noexcept final
Definition: exception.cpp:76
sycl::_V1::detail::ProgramManager::getAllSYCLKernelIDs
std::vector< kernel_id > getAllSYCLKernelIDs()
Definition: program_manager.cpp:1616
sycl::_V1::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:60
spec_constant_impl.hpp
sycl::_V1::detail::PersistentDeviceCodeCache::putItemToDisc
static void putItemToDisc(const device &Device, const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, const std::string &BuildOptionsString, const sycl::detail::pi::PiProgram &NativePrg)
Definition: persistent_device_code_cache.cpp:87
piextProgramSetSpecializationConstant
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_cuda.cpp:1028
sycl::_V1::detail::KernelProgramCache
Definition: kernel_program_cache.hpp:34
sycl::_V1::detail::program_impl::flush_spec_constants
void flush_spec_constants(const RTDeviceBinaryImage &Img, sycl::detail::pi::PiProgram NativePrg=nullptr) const
Takes current values of specialization constants and "injects" them into the underlying native progra...
Definition: program_impl.cpp:470
sycl::_V1::detail::createKernelArgMask
KernelArgMask createKernelArgMask(const ByteArray &Bytes)
Definition: kernel_arg_mask.hpp:17
sycl::_V1::detail::RTDeviceBinaryImage::PropertyRange
Definition: device_binary_image.hpp:86
sycl::_V1::detail::device_image_impl::SpecConstDescT::BlobOffset
unsigned int BlobOffset
Definition: device_image_impl.hpp:52
sycl::_V1::platform::get_info
detail::is_platform_info_desc< Param >::return_type get_info() const
Queries this SYCL platform for info.
Definition: platform.cpp:61
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_cstring
@ cl_intel_devicelib_cstring
PI_TRUE
const pi_bool PI_TRUE
Definition: pi.h:627
piProgramBuild
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
sycl::_V1::detail::RTDeviceBinaryImage::get
pi_device_binary get() const
Definition: device_binary_image.hpp:228
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:905
sycl::_V1::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:82
sycl::_V1::bundle_state::executable
@ executable
sycl::_V1::detail::SYCLConfig::get
static const char * get()
Definition: config.hpp:115
sycl::_V1::detail::RTDeviceBinaryImage::getFormat
pi::PiDeviceBinaryType getFormat() const
Returns the format of the binary image.
Definition: device_binary_image.hpp:172
sycl::_V1::detail::ProgramManager::getRawDeviceImages
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
Definition: program_manager.cpp:1660
PI_PROGRAM_BUILD_INFO_LOG
@ PI_PROGRAM_BUILD_INFO_LOG
Definition: pi.h:232
pi_int32
int32_t pi_int32
Definition: pi.h:193
sycl::_V1::detail::getBinImageState
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
Definition: program_manager.cpp:1554
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
sycl::_V1::detail::KernelProgramCache::tryToGetKernelFast
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
Definition: kernel_program_cache.hpp:182
_pi_device
Definition: pi_cuda.hpp:48
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
sycl::_V1::detail::PluginPtr
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
pi_device_binary_struct::EntriesBegin
_pi_offload_entry EntriesBegin
the offload entry table
Definition: pi.h:995
sycl::_V1::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:486