DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.cpp
Go to the documentation of this file.
1 //==------ program_manager.cpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/config.hpp>
10 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
13 #include <detail/event_impl.hpp>
16 #include <detail/platform_impl.hpp>
17 #include <detail/program_impl.hpp>
19 #include <detail/queue_impl.hpp>
21 #include <sycl/aspects.hpp>
22 #include <sycl/backend_types.hpp>
23 #include <sycl/context.hpp>
24 #include <sycl/detail/common.hpp>
26 #include <sycl/detail/os_util.hpp>
28 #include <sycl/detail/util.hpp>
29 #include <sycl/device.hpp>
30 #include <sycl/exception.hpp>
31 
33 
34 #include <algorithm>
35 #include <cassert>
36 #include <cstdint>
37 #include <cstdlib>
38 #include <cstring>
39 #include <fstream>
40 #include <memory>
41 #include <mutex>
42 #include <sstream>
43 #include <string>
44 #include <variant>
45 
46 namespace sycl {
47 inline namespace _V1 {
48 namespace detail {
49 
50 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
51 
52 static constexpr int DbgProgMgr = 0;
53 
54 static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
55 
58 static void
60  const PluginPtr &Plugin) {
62  constexpr char SpecValue = 1;
64  Prog, ITTSpecConstId, sizeof(char), &SpecValue);
65  }
66 }
67 
70 }
71 
73 createBinaryProgram(const ContextImplPtr Context, const device &Device,
74  const unsigned char *Data, size_t DataLen,
75  const std::vector<pi_device_binary_property> Metadata) {
76  const PluginPtr &Plugin = Context->getPlugin();
77 #ifndef _NDEBUG
78  pi_uint32 NumDevices = 0;
79  Plugin->call<PiApiKind::piContextGetInfo>(Context->getHandleRef(),
81  sizeof(NumDevices), &NumDevices,
82  /*param_value_size_ret=*/nullptr);
83  assert(NumDevices > 0 &&
84  "Only a single device is supported for AOT compilation");
85 #endif
86 
89  getSyclObjImpl(Device)->getHandleRef();
90  pi_int32 BinaryStatus = CL_SUCCESS;
92  Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data,
93  Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
94 
95  if (BinaryStatus != CL_SUCCESS) {
96  throw runtime_error("Creating program with binary failed.", BinaryStatus);
97  }
98 
99  return Program;
100 }
101 
103 createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data,
104  size_t DataLen) {
105  sycl::detail::pi::PiProgram Program = nullptr;
106  const PluginPtr &Plugin = Context->getPlugin();
107  Plugin->call<PiApiKind::piProgramCreate>(Context->getHandleRef(), Data,
108  DataLen, &Program);
109  return Program;
110 }
111 
112 // TODO replace this with a new PI API function
113 static bool
116  // All formats except PI_DEVICE_BINARY_TYPE_SPIRV are supported.
117  if (Format != PI_DEVICE_BINARY_TYPE_SPIRV)
118  return true;
119 
120  const backend ContextBackend = detail::getSyclObjImpl(C)->getBackend();
121 
122  // The CUDA backend cannot use SPIR-V
123  if (ContextBackend == backend::ext_oneapi_cuda)
124  return false;
125 
126  std::vector<device> Devices = C.get_devices();
127 
128  // Program type is SPIR-V, so we need a device compiler to do JIT.
129  for (const device &D : Devices) {
130  if (!D.get_info<info::device::is_compiler_available>())
131  return false;
132  }
133 
134  // OpenCL 2.1 and greater require clCreateProgramWithIL
135  if (ContextBackend == backend::opencl) {
136  std::string ver = C.get_platform().get_info<info::platform::version>();
137  if (ver.find("OpenCL 1.0") == std::string::npos &&
138  ver.find("OpenCL 1.1") == std::string::npos &&
139  ver.find("OpenCL 1.2") == std::string::npos &&
140  ver.find("OpenCL 2.0") == std::string::npos)
141  return true;
142  }
143 
144  for (const device &D : Devices) {
145  // We need cl_khr_il_program extension to be present
146  // and we can call clCreateProgramWithILKHR using the extension
147  std::vector<std::string> Extensions =
148  D.get_info<info::device::extensions>();
149  if (Extensions.end() ==
150  std::find(Extensions.begin(), Extensions.end(), "cl_khr_il_program"))
151  return false;
152  }
153 
154  return true;
155 }
156 
158  switch (Format) {
160  return "none";
162  return "native";
164  return "SPIR-V";
166  return "LLVM IR";
167  }
168  assert(false && "Unknown device image format");
169  return "unknown";
170 }
171 
174  const context &Context, const device &Device) {
175  if (DbgProgMgr > 0)
176  std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", "
177  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
178  << ")\n";
179  const pi_device_binary_struct &RawImg = Img.getRawData();
180 
181  // perform minimal sanity checks on the device image and the descriptor
182  if (RawImg.BinaryEnd < RawImg.BinaryStart) {
183  throw runtime_error("Malformed device program image descriptor",
184  PI_ERROR_INVALID_VALUE);
185  }
186  if (RawImg.BinaryEnd == RawImg.BinaryStart) {
187  throw runtime_error("Invalid device program image: size is zero",
188  PI_ERROR_INVALID_VALUE);
189  }
190  size_t ImgSize = Img.getSize();
191 
192  // TODO if the binary image is a part of the fat binary, the clang
193  // driver should have set proper format option to the
194  // clang-offload-wrapper. The fix depends on AOT compilation
195  // implementation, so will be implemented together with it.
196  // Img->Format can't be updated as it is inside of the in-memory
197  // OS module binary.
199 
200  if (Format == PI_DEVICE_BINARY_TYPE_NONE)
201  Format = pi::getBinaryImageFormat(RawImg.BinaryStart, ImgSize);
202  // sycl::detail::pi::PiDeviceBinaryType Format = Img->Format;
203  // assert(Format != PI_DEVICE_BINARY_TYPE_NONE && "Image format not set");
204 
205  if (!isDeviceBinaryTypeSupported(Context, Format))
206  throw sycl::exception(
207  sycl::errc::feature_not_supported,
208  "SPIR-V online compilation is not supported in this context");
209 
210  // Get program metadata from properties
211  auto ProgMetadata = Img.getProgramMetadata();
212  std::vector<pi_device_binary_property> ProgMetadataVector{
213  ProgMetadata.begin(), ProgMetadata.end()};
214 
215  // Load the image
216  const ContextImplPtr Ctx = getSyclObjImpl(Context);
219  ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize)
220  : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, ImgSize,
221  ProgMetadataVector);
222 
223  {
224  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
225  // associate the PI program with the image it was created for
226  NativePrograms[Res] = &Img;
227  }
228 
229  Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
230 
231  if (DbgProgMgr > 1)
232  std::cerr << "created program: " << Res
233  << "; image format: " << getFormatStr(Format) << "\n";
234 
235  return Res;
236 }
237 
238 static void appendLinkOptionsFromImage(std::string &LinkOpts,
239  const RTDeviceBinaryImage &Img) {
240  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
241  // Update only if link options are not overwritten by environment variable
242  if (!LinkOptsEnv) {
243  const char *TemporaryStr = Img.getLinkOptions();
244  if (TemporaryStr != nullptr) {
245  if (!LinkOpts.empty())
246  LinkOpts += " ";
247  LinkOpts += std::string(TemporaryStr);
248  }
249  }
250 }
251 
253  const char *PropName) {
254  pi_device_binary_property Prop = Img.getProperty(PropName);
255  return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0);
256 }
257 
258 static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img,
259  const char *PropName) {
260  pi_device_binary_property Prop = Img.getProperty(PropName);
261  std::stringstream ss;
262  if (!Prop)
263  return "";
264  int optLevel = DeviceBinaryProperty(Prop).asUint32();
265  if (optLevel < 0 || optLevel > 3)
266  return "";
267  ss << "-O" << optLevel;
268  std::string temp = ss.str();
269  return temp;
270 }
271 
272 static void
274  const RTDeviceBinaryImage &Img,
275  bool IsEsimdImage) {
276  // TODO: sycl-register-alloc-mode is deprecated and should be removed in the
277  // next ABI break.
278  pi_device_binary_property RegAllocModeProp =
279  Img.getProperty("sycl-register-alloc-mode");
280  pi_device_binary_property GRFSizeProp = Img.getProperty("sycl-grf-size");
281 
282  if (!RegAllocModeProp && !GRFSizeProp)
283  return;
284  // The mutual exclusivity of these properties should have been checked in
285  // sycl-post-link.
286  assert(!RegAllocModeProp || !GRFSizeProp);
287  bool Is256GRF = false;
288  bool IsAutoGRF = false;
289  if (RegAllocModeProp) {
290  uint32_t RegAllocModePropVal =
291  DeviceBinaryProperty(RegAllocModeProp).asUint32();
292  Is256GRF = RegAllocModePropVal ==
293  static_cast<uint32_t>(register_alloc_mode_enum::large);
294  IsAutoGRF = RegAllocModePropVal ==
295  static_cast<uint32_t>(register_alloc_mode_enum::automatic);
296  } else {
297  assert(GRFSizeProp);
298  uint32_t GRFSizePropVal = DeviceBinaryProperty(GRFSizeProp).asUint32();
299  Is256GRF = GRFSizePropVal == 256;
300  IsAutoGRF = GRFSizePropVal == 0;
301  }
302  if (Is256GRF) {
303  if (!CompileOpts.empty())
304  CompileOpts += " ";
305  // This option works for both LO AND OCL backends.
306  CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file";
307  }
308  if (IsAutoGRF) {
309  if (!CompileOpts.empty())
310  CompileOpts += " ";
311  // This option works for both LO AND OCL backends.
312  CompileOpts += "-ze-intel-enable-auto-large-GRF-mode";
313  }
314 }
315 
316 static void appendCompileOptionsFromImage(std::string &CompileOpts,
317  const RTDeviceBinaryImage &Img,
318  const std::vector<device> &Devs,
319  const PluginPtr &) {
320  // Build options are overridden if environment variables are present.
321  // Environment variables are not changed during program lifecycle so it
322  // is reasonable to use static here to read them only once.
323  static const char *CompileOptsEnv =
325  // Update only if compile options are not overwritten by environment
326  // variable
327  if (!CompileOptsEnv) {
328  if (!CompileOpts.empty())
329  CompileOpts += " ";
330  const char *TemporaryStr = Img.getCompileOptions();
331  if (TemporaryStr != nullptr)
332  CompileOpts += std::string(TemporaryStr);
333  }
334  bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage");
335  // The -vc-codegen option is always preserved for ESIMD kernels, regardless
336  // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable.
337  if (isEsimdImage) {
338  if (!CompileOpts.empty())
339  CompileOpts += " ";
340  CompileOpts += "-vc-codegen";
341  // Allow warning and performance hints from vc/finalizer if the RT warning
342  // level is at least 1.
344  CompileOpts += " -disable-finalizer-msg";
345  }
346 
347  appendCompileOptionsForGRFSizeProperties(CompileOpts, Img, isEsimdImage);
348 
349  const auto &PlatformImpl = detail::getSyclObjImpl(Devs[0].get_platform());
350 
351  // Add optimization flags.
352  auto str = getUint32PropAsOptStr(Img, "optLevel");
353  const char *optLevelStr = str.c_str();
354  // TODO: Passing these options to vector compiler causes build failure in
355  // backend. Will pass the flags once backend compilation issue is resolved.
356  // Update only if compile options are not overwritten by environment
357  // variable.
358  if (!isEsimdImage && !CompileOptsEnv && optLevelStr != nullptr &&
359  optLevelStr[0] != '\0') {
360  // Making sure all devices have the same platform.
361  assert(!Devs.empty() &&
362  std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) {
363  return Dev.get_platform() == Devs[0].get_platform();
364  }));
365  const char *backend_option = nullptr;
366  // Empty string is returned in backend_option when no appropriate backend
367  // option is available for a given frontend option.
368  PlatformImpl->getBackendOption(optLevelStr, &backend_option);
369  if (backend_option && backend_option[0] != '\0') {
370  if (!CompileOpts.empty())
371  CompileOpts += " ";
372  CompileOpts += std::string(backend_option);
373  }
374  }
375  bool IsIntelGPU =
376  (PlatformImpl->getBackend() == backend::ext_oneapi_level_zero ||
377  PlatformImpl->getBackend() == backend::opencl) &&
378  std::all_of(Devs.begin(), Devs.end(), [](const device &Dev) {
379  return Dev.is_gpu() &&
380  Dev.get_info<info::device::vendor_id>() == 0x8086;
381  });
382  if (!CompileOptsEnv) {
383  static const char *TargetCompileFast = "-ftarget-compile-fast";
384  if (auto Pos = CompileOpts.find(TargetCompileFast);
385  Pos != std::string::npos) {
386  const char *BackendOption = nullptr;
387  if (IsIntelGPU)
388  PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
389  auto OptLen = strlen(TargetCompileFast);
390  if (IsIntelGPU && BackendOption && BackendOption[0] != '\0')
391  CompileOpts.replace(Pos, OptLen, BackendOption);
392  else
393  CompileOpts.erase(Pos, OptLen);
394  }
395  static const std::string TargetRegisterAllocMode =
396  "-ftarget-register-alloc-mode=";
397  auto OptPos = CompileOpts.find(TargetRegisterAllocMode);
398  while (OptPos != std::string::npos) {
399  auto EndOfOpt = CompileOpts.find(" ", OptPos);
400  // Extract everything after the equals until the end of the option
401  auto OptValue = CompileOpts.substr(
402  OptPos + TargetRegisterAllocMode.size(),
403  EndOfOpt - OptPos - TargetRegisterAllocMode.size());
404  auto ColonPos = OptValue.find(":");
405  auto Device = OptValue.substr(0, ColonPos);
406  std::string BackendStrToAdd;
407  bool IsPVC =
408  std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) {
409  return IsIntelGPU &&
410  (Dev.get_info<ext::intel::info::device::device_id>() &
411  0xFF00) == 0x0B00;
412  });
413  // Currently 'pvc' is the only supported device.
414  if (Device == "pvc" && IsPVC)
415  BackendStrToAdd = " " + OptValue.substr(ColonPos + 1) + " ";
416 
417  // Extract everything before this option
418  std::string NewCompileOpts =
419  CompileOpts.substr(0, OptPos) + BackendStrToAdd;
420  // Extract everything after this option and add it to the above.
421  if (EndOfOpt != std::string::npos)
422  NewCompileOpts += CompileOpts.substr(EndOfOpt);
423  CompileOpts = NewCompileOpts;
424  OptPos = CompileOpts.find(TargetRegisterAllocMode);
425  }
426  }
427 }
428 
429 static void
431  static const char *AppendCompileOptsEnv =
433  if (AppendCompileOptsEnv) {
434  if (!CompileOpts.empty())
435  CompileOpts += " ";
436  CompileOpts += AppendCompileOptsEnv;
437  }
438 }
439 static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts) {
440  static const char *AppendLinkOptsEnv =
442  if (AppendLinkOptsEnv) {
443  if (!LinkOpts.empty())
444  LinkOpts += " ";
445  LinkOpts += AppendLinkOptsEnv;
446  }
447 }
448 
449 static void applyOptionsFromImage(std::string &CompileOpts,
450  std::string &LinkOpts,
451  const RTDeviceBinaryImage &Img,
452  const std::vector<device> &Devices,
453  const PluginPtr &Plugin) {
454  appendCompileOptionsFromImage(CompileOpts, Img, Devices, Plugin);
455  appendLinkOptionsFromImage(LinkOpts, Img);
456 }
457 
458 static void applyCompileOptionsFromEnvironment(std::string &CompileOpts) {
459  // Environment variables are not changed during program lifecycle so it
460  // is reasonable to use static here to read them only once.
461  static const char *CompileOptsEnv =
463  if (CompileOptsEnv) {
464  CompileOpts = CompileOptsEnv;
465  }
466 }
467 
468 static void applyLinkOptionsFromEnvironment(std::string &LinkOpts) {
469  // Environment variables are not changed during program lifecycle so it
470  // is reasonable to use static here to read them only once.
471  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
472  if (LinkOptsEnv) {
473  LinkOpts = LinkOptsEnv;
474  }
475 }
476 
477 static void applyOptionsFromEnvironment(std::string &CompileOpts,
478  std::string &LinkOpts) {
479  // Build options are overridden if environment variables are present.
482 }
483 
484 std::pair<sycl::detail::pi::PiProgram, bool>
486  const context &Context,
487  const device &Device,
488  const std::string &CompileAndLinkOptions,
489  SerializedObj SpecConsts) {
490  sycl::detail::pi::PiProgram NativePrg;
491 
493  Device, Img, SpecConsts, CompileAndLinkOptions);
494  if (BinProg.size()) {
495  // Get program metadata from properties
496  auto ProgMetadata = Img.getProgramMetadata();
497  std::vector<pi_device_binary_property> ProgMetadataVector{
498  ProgMetadata.begin(), ProgMetadata.end()};
499 
500  // TODO: Build for multiple devices once supported by program manager
501  NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
502  (const unsigned char *)BinProg[0].data(),
503  BinProg[0].size(), ProgMetadataVector);
504  } else {
505  NativePrg = createPIProgram(Img, Context, Device);
506  }
507  return {NativePrg, BinProg.size()};
508 }
509 
512 static void emitBuiltProgramInfo(const pi_program &Prog,
513  const ContextImplPtr &Context) {
515  std::string ProgramBuildLog =
516  ProgramManager::getProgramBuildLog(Prog, Context);
517  std::clog << ProgramBuildLog << std::endl;
518  }
519 }
520 
521 // When caching is enabled, the returned PiProgram will already have
522 // its ref count incremented.
524  const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl,
525  const std::string &KernelName, const NDRDescT &NDRDesc,
526  bool JITCompilationIsRequired) {
527  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
528 
529  std::string CompileOpts;
530  std::string LinkOpts;
531 
532  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
533 
534  SerializedObj SpecConsts;
535 
536  // Check if we can optimize program builds for sub-devices by using a program
537  // built for the root device
538  DeviceImplPtr RootDevImpl = DeviceImpl;
539  while (!RootDevImpl->isRootDevice()) {
540  auto ParentDev = detail::getSyclObjImpl(
541  RootDevImpl->get_info<info::device::parent_device>());
542  // Sharing is allowed within a single context only
543  if (!ContextImpl->hasDevice(ParentDev))
544  break;
545  RootDevImpl = ParentDev;
546  }
547 
548  pi_bool MustBuildOnSubdevice = PI_TRUE;
549  ContextImpl->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
550  RootDevImpl->getHandleRef(), PI_DEVICE_INFO_BUILD_ON_SUBDEVICE,
551  sizeof(pi_bool), &MustBuildOnSubdevice, nullptr);
552 
553  DeviceImplPtr Dev =
554  (MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl;
555  auto Context = createSyclObjFromImpl<context>(ContextImpl);
556  auto Device = createSyclObjFromImpl<device>(Dev);
557  const RTDeviceBinaryImage &Img =
558  getDeviceImage(KernelName, Context, Device, JITCompilationIsRequired);
559 
560  // Check that device supports all aspects used by the kernel
561  if (auto exception = checkDevSupportDeviceRequirements(Device, Img, NDRDesc))
562  throw *exception;
563 
564  auto BuildF = [this, &Img, &Context, &ContextImpl, &Device, &CompileOpts,
565  &LinkOpts, SpecConsts] {
566  const PluginPtr &Plugin = ContextImpl->getPlugin();
567  applyOptionsFromImage(CompileOpts, LinkOpts, Img, {Device}, Plugin);
568  // Should always come last!
571  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
572  Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
573 
574  if (!DeviceCodeWasInCache) {
575  if (Img.supportsSpecConstants())
576  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
577  }
578 
579  ProgramPtr ProgramManaged(
580  NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
581 
582  // Link a fallback implementation of device libraries if they are not
583  // supported by a device compiler.
584  // Pre-compiled programs (after AOT compilation or read from persitent
585  // cache) are supposed to be already linked.
586  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
587  // no fallback device library will be linked.
588  uint32_t DeviceLibReqMask = 0;
589  if (!DeviceCodeWasInCache &&
592  DeviceLibReqMask = getDeviceLibReqMask(Img);
593 
594  ProgramPtr BuiltProgram =
595  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
596  getRawSyclObjImpl(Device)->getHandleRef(), DeviceLibReqMask);
597 
598  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
599 
600  {
601  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
602  NativePrograms[BuiltProgram.get()] = &Img;
603  }
604 
605  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
606 
607  // Save program to persistent cache if it is not there
608  if (!DeviceCodeWasInCache)
610  Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
611  return BuiltProgram.release();
612  };
613 
614  uint32_t ImgId = Img.getImageID();
615  const sycl::detail::pi::PiDevice PiDevice = Dev->getHandleRef();
616  auto CacheKey =
617  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), PiDevice);
618 
619  auto GetCachedBuildF = [&Cache, &CacheKey]() {
620  return Cache.getOrInsertProgram(CacheKey);
621  };
622 
624  return BuildF();
625 
626  auto BuildResult =
627  Cache.getOrBuild<compile_program_error>(GetCachedBuildF, BuildF);
628  // getOrBuild is not supposed to return nullptr
629  assert(BuildResult != nullptr && "Invalid build result");
630 
631  // If caching is enabled, one copy of the program handle will be
632  // stored in the cache, and one handle is returned to the
633  // caller. In that case, we need to increase the ref count of the
634  // program.
635  ContextImpl->getPlugin()->call<PiApiKind::piProgramRetain>(BuildResult->Val);
636  return BuildResult->Val;
637 }
638 
639 // When caching is enabled, the returned PiProgram and PiKernel will
640 // already have their ref count incremented.
641 std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *,
644  const DeviceImplPtr &DeviceImpl,
645  const std::string &KernelName,
646  const NDRDescT &NDRDesc) {
647  if (DbgProgMgr > 0) {
648  std::cerr << ">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get()
649  << ", " << DeviceImpl.get() << ", " << KernelName << ")\n";
650  }
651 
652  using KernelArgMaskPairT = KernelProgramCache::KernelArgMaskPairT;
653 
654  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
655 
656  std::string CompileOpts, LinkOpts;
657  SerializedObj SpecConsts;
658  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
659  // Should always come last!
662  const sycl::detail::pi::PiDevice PiDevice = DeviceImpl->getHandleRef();
663 
664  auto key = std::make_tuple(std::move(SpecConsts), PiDevice,
665  CompileOpts + LinkOpts, KernelName);
667  auto ret_tuple = Cache.tryToGetKernelFast(key);
668  constexpr size_t Kernel = 0; // see KernelFastCacheValT tuple
669  constexpr size_t Program = 3; // see KernelFastCacheValT tuple
670  if (std::get<Kernel>(ret_tuple)) {
671  // Pulling a copy of a kernel and program from the cache,
672  // so we need to retain those resources.
673  ContextImpl->getPlugin()->call<PiApiKind::piKernelRetain>(
674  std::get<Kernel>(ret_tuple));
675  ContextImpl->getPlugin()->call<PiApiKind::piProgramRetain>(
676  std::get<Program>(ret_tuple));
677  return ret_tuple;
678  }
679  }
680 
682  getBuiltPIProgram(ContextImpl, DeviceImpl, KernelName, NDRDesc);
683 
684  auto BuildF = [this, &Program, &KernelName, &ContextImpl] {
685  sycl::detail::pi::PiKernel Kernel = nullptr;
686 
687  const PluginPtr &Plugin = ContextImpl->getPlugin();
689  Program, KernelName.c_str(), &Kernel);
690 
691  // Only set PI_USM_INDIRECT_ACCESS if the platform can handle it.
692  if (ContextImpl->getPlatformImpl()->supports_usm()) {
693  // Some PI Plugins (like OpenCL) require this call to enable USM
694  // For others, PI will turn this into a NOP.
695  Plugin->call<PiApiKind::piKernelSetExecInfo>(
696  Kernel, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE);
697  }
698 
699  const KernelArgMask *ArgMask = nullptr;
700  if (!m_UseSpvFile)
701  ArgMask = getEliminatedKernelArgMask(Program, KernelName);
702  return std::make_pair(Kernel, ArgMask);
703  };
704 
705  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
706  return Cache.getOrInsertKernel(Program, KernelName);
707  };
708 
710  // The built kernel cannot be shared between multiple
711  // threads when caching is disabled, so we can return
712  // nullptr for the mutex.
713  auto [Kernel, ArgMask] = BuildF();
714  return make_tuple(Kernel, nullptr, ArgMask, Program);
715  }
716 
717  auto BuildResult =
718  Cache.getOrBuild<invalid_object_error>(GetCachedBuildF, BuildF);
719  // getOrBuild is not supposed to return nullptr
720  assert(BuildResult != nullptr && "Invalid build result");
721  const KernelArgMaskPairT &KernelArgMaskPair = BuildResult->Val;
722  auto ret_val = std::make_tuple(KernelArgMaskPair.first,
723  &(BuildResult->MBuildResultMutex),
724  KernelArgMaskPair.second, Program);
725  // If caching is enabled, one copy of the kernel handle will be
726  // stored in the cache, and one handle is returned to the
727  // caller. In that case, we need to increase the ref count of the
728  // kernel.
729  ContextImpl->getPlugin()->call<PiApiKind::piKernelRetain>(
730  KernelArgMaskPair.first);
731  Cache.saveKernel(key, ret_val);
732  return ret_val;
733 }
734 
737  const ContextImplPtr Context) {
739  const PluginPtr &Plugin = Context->getPlugin();
742  &Program, nullptr);
743  return Program;
744 }
745 
746 std::string
748  const ContextImplPtr Context) {
749  size_t PIDevicesSize = 0;
750  const PluginPtr &Plugin = Context->getPlugin();
751  Plugin->call<PiApiKind::piProgramGetInfo>(Program, PI_PROGRAM_INFO_DEVICES, 0,
752  nullptr, &PIDevicesSize);
753  std::vector<sycl::detail::pi::PiDevice> PIDevices(
754  PIDevicesSize / sizeof(sycl::detail::pi::PiDevice));
756  PIDevicesSize, PIDevices.data(),
757  nullptr);
758  std::string Log = "The program was built for " +
759  std::to_string(PIDevices.size()) + " devices";
760  for (sycl::detail::pi::PiDevice &Device : PIDevices) {
761  std::string DeviceBuildInfoString;
762  size_t DeviceBuildInfoStrSize = 0;
763  Plugin->call<PiApiKind::piProgramGetBuildInfo>(
764  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, 0, nullptr,
765  &DeviceBuildInfoStrSize);
766  if (DeviceBuildInfoStrSize > 0) {
767  std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
768  Plugin->call<PiApiKind::piProgramGetBuildInfo>(
769  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize,
770  DeviceBuildInfo.data(), nullptr);
771  DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
772  }
773 
774  std::string DeviceNameString;
775  size_t DeviceNameStrSize = 0;
776  Plugin->call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_NAME, 0,
777  nullptr, &DeviceNameStrSize);
778  if (DeviceNameStrSize > 0) {
779  std::vector<char> DeviceName(DeviceNameStrSize);
780  Plugin->call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_NAME,
781  DeviceNameStrSize,
782  DeviceName.data(), nullptr);
783  DeviceNameString = std::string(DeviceName.data());
784  }
785  Log += "\nBuild program log for '" + DeviceNameString + "':\n" +
786  DeviceBuildInfoString;
787  }
788  return Log;
789 }
790 
791 // TODO device libraries may use scpecialization constants, manifest files, etc.
792 // To support that they need to be delivered in a different container - so that
793 // pi_device_binary_struct can be created for each of them.
794 static bool loadDeviceLib(const ContextImplPtr Context, const char *Name,
796  std::string LibSyclDir = OSUtil::getCurrentDSODir();
797  std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
798  std::ifstream::in | std::ifstream::binary);
799  if (!File.good()) {
800  return false;
801  }
802 
803  File.seekg(0, std::ios::end);
804  size_t FileSize = File.tellg();
805  File.seekg(0, std::ios::beg);
806  std::vector<char> FileContent(FileSize);
807  File.read(&FileContent[0], FileSize);
808  File.close();
809 
810  Prog =
811  createSpirvProgram(Context, (unsigned char *)&FileContent[0], FileSize);
812  return Prog != nullptr;
813 }
814 
815 // For each extension, a pair of library names. The first uses native support,
816 // the second emulates functionality in software.
817 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
820  {nullptr, "libsycl-fallback-cassert.spv"}},
822  {nullptr, "libsycl-fallback-cmath.spv"}},
824  {nullptr, "libsycl-fallback-cmath-fp64.spv"}},
826  {nullptr, "libsycl-fallback-complex.spv"}},
828  {nullptr, "libsycl-fallback-complex-fp64.spv"}},
830  {nullptr, "libsycl-fallback-cstring.spv"}},
832  {nullptr, "libsycl-fallback-imf.spv"}},
834  {nullptr, "libsycl-fallback-imf-fp64.spv"}},
836  {nullptr, "libsycl-fallback-imf-bf16.spv"}},
838  {"libsycl-native-bfloat16.spv", "libsycl-fallback-bfloat16.spv"}}};
839 
840 static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) {
841  auto LibPair = DeviceLibNames.find(Extension);
842  const char *Lib = nullptr;
843  if (LibPair != DeviceLibNames.end())
844  Lib = Native ? LibPair->second.first : LibPair->second.second;
845  if (Lib == nullptr)
846  throw compile_program_error("Unhandled (new?) device library extension",
847  PI_ERROR_INVALID_OPERATION);
848  return Lib;
849 }
850 
851 // For each extension understood by the SYCL runtime, the string representation
852 // of its name. Names with devicelib in them are internal to the runtime. Others
853 // are actual OpenCL extensions.
854 static const std::map<DeviceLibExt, const char *> DeviceLibExtensionStrs = {
855  {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"},
856  {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"},
858  "cl_intel_devicelib_math_fp64"},
859  {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"},
861  "cl_intel_devicelib_complex_fp64"},
862  {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"},
863  {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"},
864  {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"},
865  {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"},
867  "cl_intel_bfloat16_conversions"}};
868 
869 static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) {
870  auto Ext = DeviceLibExtensionStrs.find(Extension);
871  if (Ext == DeviceLibExtensionStrs.end())
872  throw compile_program_error("Unhandled (new?) device library extension",
873  PI_ERROR_INVALID_OPERATION);
874  return Ext->second;
875 }
876 
879  const sycl::detail::pi::PiDevice &Device,
880  bool UseNativeLib) {
881 
882  auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib);
883 
884  auto LockedCache = Context->acquireCachedLibPrograms();
885  auto CachedLibPrograms = LockedCache.get();
886  auto CacheResult = CachedLibPrograms.emplace(
887  std::make_pair(std::make_pair(Extension, Device), nullptr));
888  bool Cached = !CacheResult.second;
889  auto LibProgIt = CacheResult.first;
890  sycl::detail::pi::PiProgram &LibProg = LibProgIt->second;
891 
892  if (Cached)
893  return LibProg;
894 
895  if (!loadDeviceLib(Context, LibFileName, LibProg)) {
896  CachedLibPrograms.erase(LibProgIt);
897  throw compile_program_error(std::string("Failed to load ") + LibFileName,
898  PI_ERROR_INVALID_VALUE);
899  }
900 
901  const PluginPtr &Plugin = Context->getPlugin();
902  // TODO no spec constants are used in the std libraries, support in the future
904  Plugin->call_nocheck<PiApiKind::piProgramCompile>(
905  LibProg,
906  /*num devices = */ 1, &Device,
907  // Do not use compile options for library programs: it is not clear
908  // if user options (image options) are supposed to be applied to
909  // library program as well, and what actually happens to a SPIR-V
910  // program if we apply them.
911  "", 0, nullptr, nullptr, nullptr, nullptr);
912  if (Error != PI_SUCCESS) {
913  CachedLibPrograms.erase(LibProgIt);
914  throw compile_program_error(
915  ProgramManager::getProgramBuildLog(LibProg, Context), Error);
916  }
917 
918  return LibProg;
919 }
920 
921 ProgramManager::ProgramManager() : m_AsanFoundInImage(false) {
922  const char *SpvFile = std::getenv(UseSpvEnv);
923  // If a SPIR-V file is specified with an environment variable,
924  // register the corresponding image
925  if (SpvFile) {
926  m_UseSpvFile = true;
927  // The env var requests that the program is loaded from a SPIR-V file on
928  // disk
929  std::ifstream File(SpvFile, std::ios::binary);
930 
931  if (!File.is_open())
932  throw runtime_error(std::string("Can't open file specified via ") +
933  UseSpvEnv + ": " + SpvFile,
934  PI_ERROR_INVALID_VALUE);
935  File.seekg(0, std::ios::end);
936  size_t Size = File.tellg();
937  std::unique_ptr<char[]> Data(new char[Size]);
938  File.seekg(0);
939  File.read(Data.get(), Size);
940  File.close();
941  if (!File.good())
942  throw runtime_error(std::string("read from ") + SpvFile +
943  std::string(" failed"),
944  PI_ERROR_INVALID_VALUE);
945  // No need for a mutex here since all access to these private fields is
946  // blocked until the construction of the ProgramManager singleton is
947  // finished.
948  m_SpvFileImage =
949  std::make_unique<DynRTDeviceBinaryImage>(std::move(Data), Size);
950 
951  if (DbgProgMgr > 0) {
952  std::cerr << "loaded device image binary from " << SpvFile << "\n";
953  std::cerr << "format: " << getFormatStr(m_SpvFileImage->getFormat())
954  << "\n";
955  }
956  }
957 }
958 
960  bool JITCompilationIsRequired) {
961  if (!JITCompilationIsRequired)
962  return;
963  // If the image is already compiled with AOT, throw an exception.
964  const pi_device_binary_struct &RawImg = Image->getRawData();
965  if ((strcmp(RawImg.DeviceTargetSpec,
967  (strcmp(RawImg.DeviceTargetSpec,
969  (strcmp(RawImg.DeviceTargetSpec,
971  throw sycl::exception(sycl::errc::feature_not_supported,
972  "Recompiling AOT image is not supported");
973  }
974 }
975 
976 template <typename StorageKey>
978  const std::unordered_multimap<StorageKey, RTDeviceBinaryImage *> &ImagesSet,
979  const StorageKey &Key, const context &Context, const device &Device) {
980  auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key);
981  if (ItBegin == ItEnd)
982  return nullptr;
983 
984  std::vector<pi_device_binary> RawImgs(std::distance(ItBegin, ItEnd));
985  auto It = ItBegin;
986  for (unsigned I = 0; It != ItEnd; ++It, ++I)
987  RawImgs[I] = const_cast<pi_device_binary>(&It->second->getRawData());
988 
989  pi_uint32 ImgInd = 0;
990  // Ask the native runtime under the given context to choose the device image
991  // it prefers.
992  getSyclObjImpl(Context)
993  ->getPlugin()
995  getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
996  (pi_uint32)RawImgs.size(), &ImgInd);
997  std::advance(ItBegin, ImgInd);
998  return ItBegin->second;
999 }
1000 
1001 RTDeviceBinaryImage &
1002 ProgramManager::getDeviceImage(const std::string &KernelName,
1003  const context &Context, const device &Device,
1004  bool JITCompilationIsRequired) {
1005  if (DbgProgMgr > 0) {
1006  std::cerr << ">>> ProgramManager::getDeviceImage(\"" << KernelName << "\", "
1007  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
1008  << ", " << JITCompilationIsRequired << ")\n";
1009 
1010  std::cerr << "available device images:\n";
1012  }
1013 
1014  if (m_UseSpvFile) {
1015  assert(m_SpvFileImage);
1016  return getDeviceImage(
1017  std::unordered_set<RTDeviceBinaryImage *>({m_SpvFileImage.get()}),
1018  Context, Device, JITCompilationIsRequired);
1019  }
1020 
1021  RTDeviceBinaryImage *Img = nullptr;
1022  {
1023  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1024  if (auto KernelId = m_KernelName2KernelIDs.find(KernelName);
1025  KernelId != m_KernelName2KernelIDs.end()) {
1026  // Kernel ID presence guarantees that we have bin image in the storage.
1027  Img = getBinImageFromMultiMap(m_KernelIDs2BinImage, KernelId->second,
1028  Context, Device);
1029  assert(Img && "No binary image found for kernel id");
1030  } else {
1031  Img = getBinImageFromMultiMap(m_ServiceKernels, KernelName, Context,
1032  Device);
1033  }
1034  }
1035  if (Img) {
1036  CheckJITCompilationForImage(Img, JITCompilationIsRequired);
1037 
1038  if (DbgProgMgr > 0) {
1039  std::cerr << "selected device image: " << &Img->getRawData() << "\n";
1040  Img->print();
1041  }
1042  return *Img;
1043  }
1044 
1045  throw runtime_error("No kernel named " + KernelName + " was found",
1046  PI_ERROR_INVALID_KERNEL_NAME);
1047 }
1048 
1050  const std::unordered_set<RTDeviceBinaryImage *> &ImageSet,
1051  const context &Context, const device &Device,
1052  bool JITCompilationIsRequired) {
1053  assert(ImageSet.size() > 0);
1054 
1055  if (DbgProgMgr > 0) {
1056  std::cerr << ">>> ProgramManager::getDeviceImage(Custom SPV file "
1057  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
1058  << ", " << JITCompilationIsRequired << ")\n";
1059 
1060  std::cerr << "available device images:\n";
1062  }
1063 
1064  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1065  std::vector<pi_device_binary> RawImgs(ImageSet.size());
1066  auto ImageIterator = ImageSet.begin();
1067  for (size_t i = 0; i < ImageSet.size(); i++, ImageIterator++)
1068  RawImgs[i] = const_cast<pi_device_binary>(&(*ImageIterator)->getRawData());
1069  pi_uint32 ImgInd = 0;
1070  // Ask the native runtime under the given context to choose the device image
1071  // it prefers.
1072  getSyclObjImpl(Context)
1073  ->getPlugin()
1075  getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
1076  (pi_uint32)RawImgs.size(), &ImgInd);
1077 
1078  ImageIterator = ImageSet.begin();
1079  std::advance(ImageIterator, ImgInd);
1080 
1081  CheckJITCompilationForImage(*ImageIterator, JITCompilationIsRequired);
1082 
1083  if (DbgProgMgr > 0) {
1084  std::cerr << "selected device image: " << &(*ImageIterator)->getRawData()
1085  << "\n";
1086  (*ImageIterator)->print();
1087  }
1088  return **ImageIterator;
1089 }
1090 
1091 static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) {
1092  uint32_t Mask =
1093  0x1 << (static_cast<uint32_t>(Ext) -
1094  static_cast<uint32_t>(DeviceLibExt::cl_intel_devicelib_assert));
1095  return ((DeviceLibReqMask & Mask) == Mask);
1096 }
1097 
1098 static std::vector<sycl::detail::pi::PiProgram>
1100  const sycl::detail::pi::PiDevice &Device,
1101  uint32_t DeviceLibReqMask) {
1102  std::vector<sycl::detail::pi::PiProgram> Programs;
1103 
1104  std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1106  /* is fallback loaded? */ false},
1116 
1117  // Disable all devicelib extensions requiring fp64 support if at least
1118  // one underlying device doesn't support cl_khr_fp64.
1119  std::string DevExtList =
1120  Context->getPlatformImpl()->getDeviceImpl(Device)->get_device_info_string(
1122  const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64"));
1123 
1124  // Load a fallback library for an extension if the device does not
1125  // support it.
1126  for (auto &Pair : RequiredDeviceLibExt) {
1127  DeviceLibExt Ext = Pair.first;
1128  bool &FallbackIsLoaded = Pair.second;
1129 
1130  if (FallbackIsLoaded) {
1131  continue;
1132  }
1133 
1134  if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) {
1135  continue;
1136  }
1137 
1141  !fp64Support) {
1142  continue;
1143  }
1144 
1145  auto ExtName = getDeviceLibExtensionStr(Ext);
1146 
1147  bool InhibitNativeImpl = false;
1148  if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1149  InhibitNativeImpl = strstr(Env, ExtName) != nullptr;
1150  }
1151 
1152  bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1153  if (!DeviceSupports || InhibitNativeImpl) {
1154  Programs.push_back(
1155  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/false));
1156  FallbackIsLoaded = true;
1157  } else {
1158  // bfloat16 needs native library if device supports it
1160  Programs.push_back(
1161  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/true));
1162  FallbackIsLoaded = true;
1163  }
1164  }
1165  }
1166  return Programs;
1167 }
1168 
1169 ProgramManager::ProgramPtr ProgramManager::build(
1170  ProgramPtr Program, const ContextImplPtr Context,
1171  const std::string &CompileOptions, const std::string &LinkOptions,
1172  const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask) {
1173 
1174  if (DbgProgMgr > 0) {
1175  std::cerr << ">>> ProgramManager::build(" << Program.get() << ", "
1176  << CompileOptions << ", " << LinkOptions << ", ... " << Device
1177  << ")\n";
1178  }
1179 
1180  bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1181 
1182  // TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
1183  // We do not link with other device libraries, because it may fail
1184  // due to unrecognized SPIR-V format of those libraries.
1185  if (CompileOptions.find(std::string("-cmc")) != std::string::npos ||
1186  CompileOptions.find(std::string("-vc-codegen")) != std::string::npos)
1187  LinkDeviceLibs = false;
1188 
1189  std::vector<sycl::detail::pi::PiProgram> LinkPrograms;
1190  if (LinkDeviceLibs) {
1191  LinkPrograms = getDeviceLibPrograms(Context, Device, DeviceLibReqMask);
1192  }
1193 
1194  static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK");
1195  static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1');
1196 
1197  const PluginPtr &Plugin = Context->getPlugin();
1198  if (LinkPrograms.empty() && !ForceLink) {
1199  const std::string &Options = LinkOptions.empty()
1200  ? CompileOptions
1201  : (CompileOptions + " " + LinkOptions);
1203  Plugin->call_nocheck<PiApiKind::piProgramBuild>(
1204  Program.get(), /*num devices =*/1, &Device, Options.c_str(),
1205  nullptr, nullptr);
1206  if (Error != PI_SUCCESS)
1207  throw compile_program_error(getProgramBuildLog(Program.get(), Context),
1208  Error);
1209  return Program;
1210  }
1211 
1212  // Include the main program and compile/link everything together
1213  Plugin->call<PiApiKind::piProgramCompile>(Program.get(), /*num devices =*/1,
1214  &Device, CompileOptions.c_str(), 0,
1215  nullptr, nullptr, nullptr, nullptr);
1216  LinkPrograms.push_back(Program.get());
1217 
1218  sycl::detail::pi::PiProgram LinkedProg = nullptr;
1219  auto doLink = [&] {
1220  return Plugin->call_nocheck<PiApiKind::piProgramLink>(
1221  Context->getHandleRef(), /*num devices =*/1, &Device,
1222  LinkOptions.c_str(), LinkPrograms.size(), LinkPrograms.data(), nullptr,
1223  nullptr, &LinkedProg);
1224  };
1225  sycl::detail::pi::PiResult Error = doLink();
1226  if (Error == PI_ERROR_OUT_OF_RESOURCES) {
1227  Context->getKernelProgramCache().reset();
1228  Error = doLink();
1229  }
1230 
1231  // Link program call returns a new program object if all parameters are valid,
1232  // or NULL otherwise. Release the original (user) program.
1233  Program.reset(LinkedProg);
1234  if (Error != PI_SUCCESS) {
1235  if (LinkedProg) {
1236  // A non-trivial error occurred during linkage: get a build log, release
1237  // an incomplete (but valid) LinkedProg, and throw.
1238  throw compile_program_error(getProgramBuildLog(LinkedProg, Context),
1239  Error);
1240  }
1241  Plugin->checkPiResult(Error);
1242  }
1243  return Program;
1244 }
1245 
1246 void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) {
1247  const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1248  Img.getAssertUsed();
1249  if (AssertUsedRange.isAvailable())
1250  for (const auto &Prop : AssertUsedRange)
1251  m_KernelUsesAssert.insert(Prop->Name);
1252 }
1253 
1254 bool ProgramManager::kernelUsesAssert(const std::string &KernelName) const {
1255  return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
1256 }
1257 
1259  const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1260  for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) {
1261  pi_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]);
1262  const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
1263  const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
1264  // Treat the image as empty one
1265  if (EntriesB == EntriesE)
1266  continue;
1267 
1268  auto Img = std::make_unique<RTDeviceBinaryImage>(RawImg);
1269  static uint32_t SequenceID = 0;
1270 
1271  // Fill the kernel argument mask map
1272  const RTDeviceBinaryImage::PropertyRange &KPOIRange =
1273  Img->getKernelParamOptInfo();
1274  if (KPOIRange.isAvailable()) {
1275  KernelNameToArgMaskMap &ArgMaskMap =
1276  m_EliminatedKernelArgMasks[Img.get()];
1277  for (const auto &Info : KPOIRange)
1278  ArgMaskMap[Info->Name] =
1279  createKernelArgMask(DeviceBinaryProperty(Info).asByteArray());
1280  }
1281 
1282  // Fill maps for kernel bundles
1283  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1284 
1285  // Register all exported symbols
1286  auto ExportedSymbols = Img->getExportedSymbols();
1287  for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols)
1288  m_ExportedSymbols.insert(ExportedSymbol->Name);
1289 
1290  if (DumpImages) {
1291  const bool NeedsSequenceID = std::any_of(
1292  m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(),
1293  [&](auto &CurrentImg) {
1294  return CurrentImg.first->getFormat() == Img->getFormat();
1295  });
1296  dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0);
1297  }
1298 
1299  m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);
1300 
1301  for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1302  ++EntriesIt) {
1303 
1304  // Skip creating unique kernel ID if it is a service kernel.
1305  // SYCL service kernels are identified by having
1306  // __sycl_service_kernel__ in the mangled name, primarily as part of
1307  // the namespace of the name type.
1308  if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
1309  m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
1310  continue;
1311  }
1312 
1313  // Skip creating unique kernel ID if it is an exported device
1314  // function. Exported device functions appear in the offload entries
1315  // among kernels, but are identifiable by being listed in properties.
1316  if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1317  continue;
1318 
1319  // ... and create a unique kernel ID for the entry
1320  auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1321  if (It == m_KernelName2KernelIDs.end()) {
1322  std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1323  std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1324  sycl::kernel_id KernelID =
1325  detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1326 
1327  It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
1328  }
1329  m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1330  m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1331  }
1332 
1333  cacheKernelUsesAssertInfo(*Img);
1334 
1335  // check if kernel uses asan
1336  {
1337  pi_device_binary_property Prop = Img->getProperty("asanUsed");
1338  m_AsanFoundInImage |=
1339  Prop && (detail::DeviceBinaryProperty(Prop).asUint32() != 0);
1340  }
1341 
1342  // Sort kernel ids for faster search
1343  std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1344  m_BinImg2KernelIDs[Img.get()]->end(), LessByHash<kernel_id>{});
1345 
1346  // ... and initialize associated device_global information
1347  {
1348  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1349 
1350  auto DeviceGlobals = Img->getDeviceGlobals();
1351  for (const pi_device_binary_property &DeviceGlobal : DeviceGlobals) {
1352  ByteArray DeviceGlobalInfo =
1353  DeviceBinaryProperty(DeviceGlobal).asByteArray();
1354 
1355  // The supplied device_global info property is expected to contain:
1356  // * 8 bytes - Size of the property.
1357  // * 4 bytes - Size of the underlying type in the device_global.
1358  // * 4 bytes - 0 if device_global has device_image_scope and any value
1359  // otherwise.
1360  DeviceGlobalInfo.dropBytes(8);
1361  auto [TypeSize, DeviceImageScopeDecorated] =
1362  DeviceGlobalInfo.consume<std::uint32_t, std::uint32_t>();
1363  assert(DeviceGlobalInfo.empty() && "Extra data left!");
1364 
1365  // Give the image pointer as an identifier for the image the
1366  // device-global is associated with.
1367 
1368  auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1369  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1370  // If it has already been registered we update the information.
1371  ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize,
1372  DeviceImageScopeDecorated);
1373  } else {
1374  // If it has not already been registered we create a new entry.
1375  // Note: Pointer to the device global is not available here, so it
1376  // cannot be set until registration happens.
1377  auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1378  DeviceGlobal->Name, Img.get(), TypeSize,
1379  DeviceImageScopeDecorated);
1380  m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1381  }
1382  }
1383  }
1384  // ... and initialize associated host_pipe information
1385  {
1386  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1387  auto HostPipes = Img->getHostPipes();
1388  for (const pi_device_binary_property &HostPipe : HostPipes) {
1389  ByteArray HostPipeInfo = DeviceBinaryProperty(HostPipe).asByteArray();
1390 
1391  // The supplied host_pipe info property is expected to contain:
1392  // * 8 bytes - Size of the property.
1393  // * 4 bytes - Size of the underlying type in the host_pipe.
1394  // Note: Property may be padded.
1395 
1396  HostPipeInfo.dropBytes(8);
1397  auto TypeSize = HostPipeInfo.consume<std::uint32_t>();
1398  assert(HostPipeInfo.empty() && "Extra data left!");
1399 
1400  auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name);
1401  if (ExistingHostPipe != m_HostPipes.end()) {
1402  // If it has already been registered we update the information.
1403  ExistingHostPipe->second->initialize(TypeSize);
1404  ExistingHostPipe->second->initialize(Img.get());
1405  } else {
1406  // If it has not already been registered we create a new entry.
1407  // Note: Pointer to the host pipe is not available here, so it
1408  // cannot be set until registration happens.
1409  auto EntryUPtr =
1410  std::make_unique<HostPipeMapEntry>(HostPipe->Name, TypeSize);
1411  EntryUPtr->initialize(Img.get());
1412  m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr));
1413  }
1414  }
1415  }
1416  m_DeviceImages.insert(std::move(Img));
1417  }
1418 }
1419 
1421  for (const auto &ImgIt : m_BinImg2KernelIDs) {
1422  ImgIt.first->print();
1423  }
1424 }
1425 
1426 void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img,
1427  uint32_t SequenceID) const {
1428  const char *Prefix = std::getenv("SYCL_DUMP_IMAGES_PREFIX");
1429  std::string Fname(Prefix ? Prefix : "sycl_");
1430  const pi_device_binary_struct &RawImg = Img.getRawData();
1431  Fname += RawImg.DeviceTargetSpec;
1432  if (SequenceID)
1433  Fname += '_' + std::to_string(SequenceID);
1434  std::string Ext;
1435 
1437  if (Format == PI_DEVICE_BINARY_TYPE_SPIRV)
1438  Ext = ".spv";
1439  else if (Format == PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE)
1440  Ext = ".bc";
1441  else
1442  Ext = ".bin";
1443  Fname += Ext;
1444 
1445  std::ofstream F(Fname, std::ios::binary);
1446 
1447  if (!F.is_open()) {
1448  throw runtime_error("Can not write " + Fname, PI_ERROR_UNKNOWN);
1449  }
1450  Img.dump(F);
1451  F.close();
1452 }
1453 
1455  sycl::detail::pi::PiProgram NativePrg,
1456  const RTDeviceBinaryImage *Img) {
1457  if (DbgProgMgr > 2) {
1458  std::cerr << ">>> ProgramManager::flushSpecConstants(" << Prg.get()
1459  << ",...)\n";
1460  }
1461  if (!Prg.hasSetSpecConstants())
1462  return; // nothing to do
1463  pi::PiProgram PrgHandle = Prg.getHandleRef();
1464  // program_impl can't correspond to two different native programs
1465  assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1466  NativePrg = NativePrg ? NativePrg : PrgHandle;
1467 
1468  if (!Img) {
1469  // caller hasn't provided the image object - find it
1470  { // make sure NativePrograms map access is synchronized
1471  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1472  auto It = NativePrograms.find(NativePrg);
1473  if (It == NativePrograms.end())
1474  throw sycl::exception(
1475  sycl::errc::invalid,
1476  "spec constant is set in a program w/o a binary image");
1477  Img = It->second;
1478  }
1479  if (!Img->supportsSpecConstants()) {
1480  if (DbgProgMgr > 0)
1481  std::cerr << ">>> ProgramManager::flushSpecConstants: binary image "
1482  << &Img->getRawData() << " doesn't support spec constants\n";
1483  // This device binary image does not support runtime setting of
1484  // specialization constants; compiler must have generated default values.
1485  // NOTE: Can't throw here, as it would always take place with AOT
1486  //-compiled code. New Khronos 2020 spec should fix this inconsistency.
1487  return;
1488  }
1489  }
1490  Prg.flush_spec_constants(*Img, NativePrg);
1491 }
1492 
1494  const RTDeviceBinaryImage::PropertyRange &DLMRange =
1495  Img.getDeviceLibReqMask();
1496  if (DLMRange.isAvailable())
1497  return DeviceBinaryProperty(*(DLMRange.begin())).asUint32();
1498  else
1499  return 0x0;
1500 }
1501 
1502 const KernelArgMask *
1504  const std::string &KernelName) {
1505  // Bail out if there are no eliminated kernel arg masks in our images
1506  if (m_EliminatedKernelArgMasks.empty())
1507  return nullptr;
1508 
1509  {
1510  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1511  auto ImgIt = NativePrograms.find(NativePrg);
1512  if (ImgIt != NativePrograms.end()) {
1513  auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1514  if (MapIt != m_EliminatedKernelArgMasks.end()) {
1515  auto ArgMaskMapIt = MapIt->second.find(KernelName);
1516  if (ArgMaskMapIt != MapIt->second.end())
1517  return &MapIt->second[KernelName];
1518  }
1519  return nullptr;
1520  }
1521  }
1522 
1523  // If the program was not cached iterate over all available images looking for
1524  // the requested kernel
1525  for (auto &Elem : m_EliminatedKernelArgMasks) {
1526  auto ArgMask = Elem.second.find(KernelName);
1527  if (ArgMask != Elem.second.end())
1528  return &ArgMask->second;
1529  }
1530 
1531  // The kernel is not generated by DPCPP stack, so a mask doesn't exist for it
1532  return nullptr;
1533 }
1534 
1536  auto IsAOTBinary = [](const char *Format) {
1537  return (
1538  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) ||
1539  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) ||
1540  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0));
1541  };
1542 
1543  // There are only two initial states so far - SPIRV which needs to be compiled
1544  // and linked and fully compiled(AOTed) binary
1545 
1546  const bool IsAOT = IsAOTBinary(BinImage->getRawData().DeviceTargetSpec);
1547 
1548  return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1549 }
1550 
1552  const device &Dev) {
1553  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1555  auto &Plugin = DeviceImpl->getPlugin();
1556 
1557  const sycl::detail::pi::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef();
1558 
1559  // Call piextDeviceSelectBinary with only one image to check if an image is
1560  // compatible with implementation. The function returns invalid index if no
1561  // device images are compatible.
1562  pi_uint32 SuitableImageID = std::numeric_limits<pi_uint32>::max();
1563  pi_device_binary DevBin =
1564  const_cast<pi_device_binary>(&BinImage->getRawData());
1566  Plugin->call_nocheck<PiApiKind::piextDeviceSelectBinary>(
1567  PIDeviceHandle, &DevBin,
1568  /*num bin images = */ (pi_uint32)1, &SuitableImageID);
1569  if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1570  throw runtime_error("Invalid binary image or device",
1571  PI_ERROR_INVALID_VALUE);
1572 
1573  return (0 == SuitableImageID);
1574 }
1575 
1576 kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) {
1577  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1578 
1579  auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1580  if (KernelID == m_KernelName2KernelIDs.end())
1581  throw runtime_error("No kernel found with the specified name",
1582  PI_ERROR_INVALID_KERNEL_NAME);
1583 
1584  return KernelID->second;
1585 }
1586 
1588  std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1589 
1590  return std::any_of(
1591  m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1592  [&](std::pair<RTDeviceBinaryImage *,
1593  std::shared_ptr<std::vector<kernel_id>>>
1594  Elem) { return compatibleWithDevice(Elem.first, Dev); });
1595 }
1596 
1597 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1598  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1599 
1600  std::vector<sycl::kernel_id> AllKernelIDs;
1601  AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1602  for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1603  AllKernelIDs.push_back(KernelID.second);
1604  }
1605  return AllKernelIDs;
1606 }
1607 
1608 kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) {
1609  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1610 
1611  auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1612  if (KernelID == m_BuiltInKernelIDs.end()) {
1613  auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1614  auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1615  KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1616  }
1617 
1618  return KernelID->second;
1619 }
1620 
1621 void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
1622  const char *UniqueId) {
1623  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1624 
1625  auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1626  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1627  // Update the existing information and add the entry to the pointer map.
1628  ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1629  m_Ptr2DeviceGlobal.insert(
1630  {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1631  return;
1632  }
1633 
1634  auto EntryUPtr =
1635  std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1636  auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1637  m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1638 }
1639 
1640 std::set<RTDeviceBinaryImage *>
1641 ProgramManager::getRawDeviceImages(const std::vector<kernel_id> &KernelIDs) {
1642  std::set<RTDeviceBinaryImage *> BinImages;
1643  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1644  for (const kernel_id &KID : KernelIDs) {
1645  auto Range = m_KernelIDs2BinImage.equal_range(KID);
1646  for (auto It = Range.first, End = Range.second; It != End; ++It)
1647  BinImages.insert(It->second);
1648  }
1649  return BinImages;
1650 }
1651 
1653 ProgramManager::getDeviceGlobalEntry(const void *DeviceGlobalPtr) {
1654  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1655  auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1656  assert(Entry != m_Ptr2DeviceGlobal.end() && "Device global entry not found");
1657  return Entry->second;
1658 }
1659 
1660 std::vector<DeviceGlobalMapEntry *> ProgramManager::getDeviceGlobalEntries(
1661  const std::vector<std::string> &UniqueIds,
1662  bool ExcludeDeviceImageScopeDecorated) {
1663  std::vector<DeviceGlobalMapEntry *> FoundEntries;
1664  FoundEntries.reserve(UniqueIds.size());
1665 
1666  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1667  for (const std::string &UniqueId : UniqueIds) {
1668  auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1669  assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1670  "Device global not found in map.");
1671  if (!ExcludeDeviceImageScopeDecorated ||
1672  !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1673  FoundEntries.push_back(DeviceGlobalEntry->second.get());
1674  }
1675  return FoundEntries;
1676 }
1677 
1678 void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr,
1679  const char *UniqueId) {
1680  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1681 
1682  auto ExistingHostPipe = m_HostPipes.find(UniqueId);
1683  if (ExistingHostPipe != m_HostPipes.end()) {
1684  ExistingHostPipe->second->initialize(HostPipePtr);
1685  m_Ptr2HostPipe.insert({HostPipePtr, ExistingHostPipe->second.get()});
1686  return;
1687  }
1688 
1689  auto EntryUPtr = std::make_unique<HostPipeMapEntry>(UniqueId, HostPipePtr);
1690  auto NewEntry = m_HostPipes.emplace(UniqueId, std::move(EntryUPtr));
1691  m_Ptr2HostPipe.insert({HostPipePtr, NewEntry.first->second.get()});
1692 }
1693 
1695 ProgramManager::getHostPipeEntry(const std::string &UniqueId) {
1696  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1697  auto Entry = m_HostPipes.find(UniqueId);
1698  assert(Entry != m_HostPipes.end() && "Host pipe entry not found");
1699  return Entry->second.get();
1700 }
1701 
1703  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1704  auto Entry = m_Ptr2HostPipe.find(HostPipePtr);
1705  assert(Entry != m_Ptr2HostPipe.end() && "Host pipe entry not found");
1706  return Entry->second;
1707 }
1708 
1710  RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev) {
1711  const bundle_state ImgState = getBinImageState(BinImage);
1712 
1713  assert(compatibleWithDevice(BinImage, Dev));
1714 
1715  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1716  // Collect kernel names for the image.
1717  {
1718  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1719  KernelIDs = m_BinImg2KernelIDs[BinImage];
1720  }
1721 
1722  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1723  BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1724  /*PIProgram=*/nullptr);
1725 
1726  return createSyclObjFromImpl<device_image_plain>(Impl);
1727 }
1728 
1729 std::vector<device_image_plain>
1731  const context &Ctx, const std::vector<device> &Devs,
1732  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs) {
1733 
1734  // Collect unique raw device images taking into account kernel ids passed
1735  // TODO: Can we avoid repacking?
1736  std::set<RTDeviceBinaryImage *> BinImages;
1737  if (!KernelIDs.empty()) {
1738  for (const auto &KID : KernelIDs) {
1739  bool isCompatibleWithAtLeastOneDev =
1740  std::any_of(Devs.begin(), Devs.end(), [&KID](const auto &Dev) {
1741  return sycl::is_compatible({KID}, Dev);
1742  });
1743  if (!isCompatibleWithAtLeastOneDev)
1744  throw sycl::exception(
1746  "Kernel is incompatible with all devices in devs");
1747  }
1748  BinImages = getRawDeviceImages(KernelIDs);
1749  } else {
1750  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1751  for (auto &ImageUPtr : m_BinImg2KernelIDs) {
1752  BinImages.insert(ImageUPtr.first);
1753  }
1754  }
1755 
1756  // Ignore images with incompatible state. Image is considered compatible
1757  // with a target state if an image is already in the target state or can
1758  // be brought to target state by compiling/linking/building.
1759  //
1760  // Example: an image in "executable" state is not compatible with
1761  // "input" target state - there is no operation to convert the image it
1762  // to "input" state. An image in "input" state is compatible with
1763  // "executable" target state because it can be built to get into
1764  // "executable" state.
1765  for (auto It = BinImages.begin(); It != BinImages.end();) {
1766  if (getBinImageState(*It) > TargetState)
1767  It = BinImages.erase(It);
1768  else
1769  ++It;
1770  }
1771 
1772  std::vector<device_image_plain> SYCLDeviceImages;
1773 
1774  // If a non-input state is requested, we can filter out some compatible
1775  // images and return only those with the highest compatible state for each
1776  // device-kernel pair. This map tracks how many kernel-device pairs need each
1777  // image, so that any unneeded ones are skipped.
1778  // TODO this has no effect if the requested state is input, consider having
1779  // a separate branch for that case to avoid unnecessary tracking work.
1780  struct DeviceBinaryImageInfo {
1781  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1783  int RequirementCounter = 0;
1784  };
1785  std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
1786 
1787  for (const sycl::device &Dev : Devs) {
1788  // Track the highest image state for each requested kernel.
1789  using StateImagesPairT =
1790  std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
1791  using KernelImageMapT =
1792  std::map<kernel_id, StateImagesPairT, LessByNameComp>;
1793  KernelImageMapT KernelImageMap;
1794  if (!KernelIDs.empty())
1795  for (const kernel_id &KernelID : KernelIDs)
1796  KernelImageMap.insert({KernelID, {}});
1797 
1798  for (RTDeviceBinaryImage *BinImage : BinImages) {
1799  if (!compatibleWithDevice(BinImage, Dev) ||
1800  !doesDevSupportDeviceRequirements(Dev, *BinImage))
1801  continue;
1802 
1803  auto InsertRes = ImageInfoMap.insert({BinImage, {}});
1804  DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
1805  if (InsertRes.second) {
1806  ImgInfo.State = getBinImageState(BinImage);
1807  // Collect kernel names for the image
1808  {
1809  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1810  ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
1811  }
1812  }
1813  const bundle_state ImgState = ImgInfo.State;
1814  const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
1815  ImgInfo.KernelIDs;
1816  int &ImgRequirementCounter = ImgInfo.RequirementCounter;
1817 
1818  // If the image does not contain any non-service kernels we can skip it.
1819  if (!ImageKernelIDs || ImageKernelIDs->empty())
1820  continue;
1821 
1822  // Update tracked information.
1823  for (kernel_id &KernelID : *ImageKernelIDs) {
1824  StateImagesPairT *StateImagesPair;
1825  // If only specific kernels are requested, ignore the rest.
1826  if (!KernelIDs.empty()) {
1827  auto It = KernelImageMap.find(KernelID);
1828  if (It == KernelImageMap.end())
1829  continue;
1830  StateImagesPair = &It->second;
1831  } else
1832  StateImagesPair = &KernelImageMap[KernelID];
1833 
1834  auto &[KernelImagesState, KernelImages] = *StateImagesPair;
1835 
1836  if (KernelImages.empty()) {
1837  KernelImagesState = ImgState;
1838  KernelImages.push_back(BinImage);
1839  ++ImgRequirementCounter;
1840  } else if (KernelImagesState < ImgState) {
1841  for (RTDeviceBinaryImage *Img : KernelImages) {
1842  auto It = ImageInfoMap.find(Img);
1843  assert(It != ImageInfoMap.end());
1844  assert(It->second.RequirementCounter > 0);
1845  --(It->second.RequirementCounter);
1846  }
1847  KernelImages.clear();
1848  KernelImages.push_back(BinImage);
1849  KernelImagesState = ImgState;
1850  ++ImgRequirementCounter;
1851  } else if (KernelImagesState == ImgState) {
1852  KernelImages.push_back(BinImage);
1853  ++ImgRequirementCounter;
1854  }
1855  }
1856  }
1857  }
1858 
1859  for (const auto &ImgInfoPair : ImageInfoMap) {
1860  if (ImgInfoPair.second.RequirementCounter == 0)
1861  continue;
1862 
1863  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1864  ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
1865  ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
1866 
1867  SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
1868  }
1869 
1870  return SYCLDeviceImages;
1871 }
1872 
1873 void ProgramManager::bringSYCLDeviceImagesToState(
1874  std::vector<device_image_plain> &DeviceImages, bundle_state TargetState) {
1875 
1876  for (device_image_plain &DevImage : DeviceImages) {
1877  const bundle_state DevImageState = getSyclObjImpl(DevImage)->get_state();
1878 
1879  // At this time, there is no circumstance where a device image should ever
1880  // be in the source state. That not good.
1881  assert(DevImageState != bundle_state::ext_oneapi_source);
1882 
1883  switch (TargetState) {
1885  // This case added for switch statement completion. We should not be here.
1886  assert(DevImageState == bundle_state::ext_oneapi_source);
1887  break;
1888  case bundle_state::input:
1889  // Do nothing since there is no state which can be upgraded to the input.
1890  assert(DevImageState == bundle_state::input);
1891  break;
1892  case bundle_state::object:
1893  if (DevImageState == bundle_state::input) {
1894  DevImage = compile(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1895  /*PropList=*/{});
1896  break;
1897  }
1898  // Device image is expected to be object state then.
1899  assert(DevImageState == bundle_state::object);
1900  break;
1901  case bundle_state::executable: {
1902  switch (DevImageState) {
1904  // This case added for switch statement completion.
1905  // We should not be here.
1906  assert(DevImageState != bundle_state::ext_oneapi_source);
1907  break;
1908  case bundle_state::input:
1909  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1910  /*PropList=*/{});
1911  break;
1912  case bundle_state::object: {
1913  std::vector<device_image_plain> LinkedDevImages =
1914  link({DevImage}, getSyclObjImpl(DevImage)->get_devices(),
1915  /*PropList=*/{});
1916  // Since only one device image is passed here one output device image is
1917  // expected
1918  assert(LinkedDevImages.size() == 1 && "Expected one linked image here");
1919  DevImage = LinkedDevImages[0];
1920  break;
1921  }
1923  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1924  /*PropList=*/{});
1925  break;
1926  }
1927  break;
1928  }
1929  }
1930  }
1931 }
1932 
1933 std::vector<device_image_plain>
1934 ProgramManager::getSYCLDeviceImages(const context &Ctx,
1935  const std::vector<device> &Devs,
1936  bundle_state TargetState) {
1937  // Collect device images with compatible state
1938  std::vector<device_image_plain> DeviceImages =
1939  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1940  // Bring device images with compatible state to desired state.
1941  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1942  return DeviceImages;
1943 }
1944 
1945 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1946  const context &Ctx, const std::vector<device> &Devs,
1947  const DevImgSelectorImpl &Selector, bundle_state TargetState) {
1948  // Collect device images with compatible state
1949  std::vector<device_image_plain> DeviceImages =
1950  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1951 
1952  // Filter out images that are rejected by Selector
1953  auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1954  [&Selector](const device_image_plain &Image) {
1955  return !Selector(getSyclObjImpl(Image));
1956  });
1957  DeviceImages.erase(It, DeviceImages.end());
1958 
1959  // The spec says that the function should not call online compiler or linker
1960  // to translate device images into target state
1961  return DeviceImages;
1962 }
1963 
1964 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1965  const context &Ctx, const std::vector<device> &Devs,
1966  const std::vector<kernel_id> &KernelIDs, bundle_state TargetState) {
1967  // Fast path for when no kernel IDs are requested
1968  if (KernelIDs.empty())
1969  return {};
1970 
1971  {
1972  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1973 
1974  for (auto &It : m_BuiltInKernelIDs) {
1975  if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1976  KernelIDs.end())
1978  "Attempting to use a built-in kernel. They are "
1979  "not fully supported");
1980  }
1981  }
1982 
1983  // Collect device images with compatible state
1984  std::vector<device_image_plain> DeviceImages =
1985  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1986 
1987  // Bring device images with compatible state to desired state.
1988  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1989  return DeviceImages;
1990 }
1991 
1992 static void
1993 setSpecializationConstants(const std::shared_ptr<device_image_impl> &InputImpl,
1995  const PluginPtr &Plugin) {
1996  // Set ITT annotation specialization constant if needed.
1997  enableITTAnnotationsIfNeeded(Prog, Plugin);
1998 
1999  std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
2000  const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
2001  &SpecConstData = InputImpl->get_spec_const_data_ref();
2002  const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
2003 
2004  // Set all specialization IDs from descriptors in the input device image.
2005  for (const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
2006  std::ignore = SpecConstNames;
2007  for (const device_image_impl::SpecConstDescT &SpecIDDesc : SpecConstDescs) {
2008  if (SpecIDDesc.IsSet) {
2010  Prog, SpecIDDesc.ID, SpecIDDesc.Size,
2011  SpecConsts.data() + SpecIDDesc.BlobOffset);
2012  }
2013  }
2014  }
2015 }
2016 
2017 device_image_plain
2019  const std::vector<device> &Devs,
2020  const property_list &) {
2021 
2022  // TODO: Extract compile options from property list once the Spec clarifies
2023  // how they can be passed.
2024 
2025  // TODO: Probably we could have cached compiled device images.
2026  const std::shared_ptr<device_image_impl> &InputImpl =
2027  getSyclObjImpl(DeviceImage);
2028 
2029  const PluginPtr &Plugin =
2030  getSyclObjImpl(InputImpl->get_context())->getPlugin();
2031 
2032  // TODO: Add support for creating non-SPIRV programs from multiple devices.
2033  if (InputImpl->get_bin_image_ref()->getFormat() !=
2035  Devs.size() > 1)
2036  sycl::runtime_error(
2037  "Creating a program from AOT binary for multiple device is not "
2038  "supported",
2039  PI_ERROR_INVALID_OPERATION);
2040 
2041  // Device is not used when creating program from SPIRV, so passing only one
2042  // device is OK.
2043  sycl::detail::pi::PiProgram Prog = createPIProgram(
2044  *InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs[0]);
2045 
2046  if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
2047  setSpecializationConstants(InputImpl, Prog, Plugin);
2048 
2049  DeviceImageImplPtr ObjectImpl = std::make_shared<detail::device_image_impl>(
2050  InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
2051  bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog,
2052  InputImpl->get_spec_const_data_ref(),
2053  InputImpl->get_spec_const_blob_ref());
2054 
2055  std::vector<pi_device> PIDevices;
2056  PIDevices.reserve(Devs.size());
2057  for (const device &Dev : Devs)
2058  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
2059 
2060  // TODO: Handle zero sized Device list.
2061  std::string CompileOptions;
2062  applyCompileOptionsFromEnvironment(CompileOptions);
2064  CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
2065  // Should always come last!
2068  Plugin->call_nocheck<PiApiKind::piProgramCompile>(
2069  ObjectImpl->get_program_ref(), /*num devices=*/Devs.size(),
2070  PIDevices.data(), CompileOptions.c_str(),
2071  /*num_input_headers=*/0, /*input_headers=*/nullptr,
2072  /*header_include_names=*/nullptr,
2073  /*pfn_notify=*/nullptr, /*user_data*/ nullptr);
2074  if (Error != PI_SUCCESS)
2075  throw sycl::exception(
2077  getProgramBuildLog(ObjectImpl->get_program_ref(),
2078  getSyclObjImpl(ObjectImpl->get_context())));
2079 
2080  return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2081 }
2082 
2083 std::vector<device_image_plain>
2085  const std::vector<device> &Devs,
2086  const property_list &PropList) {
2087  (void)PropList;
2088 
2089  std::vector<pi_program> PIPrograms;
2090  PIPrograms.push_back(getSyclObjImpl(DeviceImage)->get_program_ref());
2091 
2092  std::vector<pi_device> PIDevices;
2093  PIDevices.reserve(Devs.size());
2094  for (const device &Dev : Devs)
2095  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
2096 
2097  std::string LinkOptionsStr;
2098  applyLinkOptionsFromEnvironment(LinkOptionsStr);
2099  if (LinkOptionsStr.empty()) {
2100  const std::shared_ptr<device_image_impl> &InputImpl =
2101  getSyclObjImpl(DeviceImage);
2102  appendLinkOptionsFromImage(LinkOptionsStr,
2103  *(InputImpl->get_bin_image_ref()));
2104  }
2105  // Should always come last!
2107  const context &Context = getSyclObjImpl(DeviceImage)->get_context();
2108  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2109  const PluginPtr &Plugin = ContextImpl->getPlugin();
2110 
2111  sycl::detail::pi::PiProgram LinkedProg = nullptr;
2112  auto doLink = [&] {
2113  return Plugin->call_nocheck<PiApiKind::piProgramLink>(
2114  ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
2115  /*options=*/LinkOptionsStr.c_str(), PIPrograms.size(),
2116  PIPrograms.data(),
2117  /*pfn_notify=*/nullptr,
2118  /*user_data=*/nullptr, &LinkedProg);
2119  };
2120  sycl::detail::pi::PiResult Error = doLink();
2121  if (Error == PI_ERROR_OUT_OF_RESOURCES) {
2122  ContextImpl->getKernelProgramCache().reset();
2123  Error = doLink();
2124  }
2125 
2126  if (Error != PI_SUCCESS) {
2127  if (LinkedProg) {
2128  const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2129  throw sycl::exception(make_error_code(errc::build), ErrorMsg);
2130  }
2131  Plugin->reportPiError(Error, "link()");
2132  }
2133 
2134  std::shared_ptr<std::vector<kernel_id>> KernelIDs{new std::vector<kernel_id>};
2135  std::vector<unsigned char> NewSpecConstBlob;
2136  device_image_impl::SpecConstMapT NewSpecConstMap;
2137 
2138  std::shared_ptr<device_image_impl> DeviceImageImpl =
2139  getSyclObjImpl(DeviceImage);
2140 
2141  // Duplicates are not expected here, otherwise piProgramLink should fail
2142  KernelIDs->insert(KernelIDs->end(),
2143  DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2144  DeviceImageImpl->get_kernel_ids_ptr()->end());
2145 
2146  // To be able to answer queries about specialziation constants, the new
2147  // device image should have the specialization constants from all the linked
2148  // images.
2149  {
2150  const std::lock_guard<std::mutex> SpecConstLock(
2151  DeviceImageImpl->get_spec_const_data_lock());
2152 
2153  // Copy all map entries to the new map. Since the blob will be copied to
2154  // the end of the new blob we need to move the blob offset of each entry.
2155  for (const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) {
2156  std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2157  NewSpecConstMap[SpecConstIt.first];
2158  assert(NewDescEntries.empty() &&
2159  "Specialization constant already exists in the map.");
2160  NewDescEntries.reserve(SpecConstIt.second.size());
2161  for (const device_image_impl::SpecConstDescT &SpecConstDesc :
2162  SpecConstIt.second) {
2163  device_image_impl::SpecConstDescT NewSpecConstDesc = SpecConstDesc;
2164  NewSpecConstDesc.BlobOffset += NewSpecConstBlob.size();
2165  NewDescEntries.push_back(std::move(NewSpecConstDesc));
2166  }
2167  }
2168 
2169  // Copy the blob from the device image into the new blob. This moves the
2170  // offsets of the following blobs.
2171  NewSpecConstBlob.insert(NewSpecConstBlob.end(),
2172  DeviceImageImpl->get_spec_const_blob_ref().begin(),
2173  DeviceImageImpl->get_spec_const_blob_ref().end());
2174  }
2175 
2176  // device_image_impl expects kernel ids to be sorted for fast search
2177  std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash<kernel_id>{});
2178 
2179  auto BinImg = getSyclObjImpl(DeviceImage)->get_bin_image_ref();
2180  DeviceImageImplPtr ExecutableImpl =
2181  std::make_shared<detail::device_image_impl>(
2182  BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs),
2183  LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob));
2184 
2185  // TODO: Make multiple sets of device images organized by devices they are
2186  // compiled for.
2187  return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2188 }
2189 
2190 // The function duplicates most of the code from existing getBuiltPIProgram.
2191 // The differences are:
2192 // Different API - uses different objects to extract required info
2193 // Supports caching of a program built for multiple devices
2195  const std::vector<device> &Devs,
2196  const property_list &PropList) {
2197  (void)PropList;
2198 
2199  const std::shared_ptr<device_image_impl> &InputImpl =
2200  getSyclObjImpl(DeviceImage);
2201 
2202  const context Context = InputImpl->get_context();
2203 
2204  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2205 
2206  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
2207 
2208  std::string CompileOpts;
2209  std::string LinkOpts;
2210  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
2211 
2212  const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref();
2213  const RTDeviceBinaryImage &Img = *ImgPtr;
2214 
2215  SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2216 
2217  // TODO: Unify this code with getBuiltPIProgram
2218  auto BuildF = [this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2219  &InputImpl, SpecConsts] {
2220  ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2221  const PluginPtr &Plugin = ContextImpl->getPlugin();
2222  applyOptionsFromImage(CompileOpts, LinkOpts, Img, Devs, Plugin);
2223  // Should always come last!
2226  // TODO: Add support for creating non-SPIRV programs from multiple devices.
2227  if (InputImpl->get_bin_image_ref()->getFormat() !=
2229  Devs.size() > 1)
2230  sycl::runtime_error(
2231  "Creating a program from AOT binary for multiple device is not "
2232  "supported",
2233  PI_ERROR_INVALID_OPERATION);
2234 
2235  // Device is not used when creating program from SPIRV, so passing only one
2236  // device is OK.
2237  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
2238  Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2239 
2240  if (!DeviceCodeWasInCache &&
2241  InputImpl->get_bin_image_ref()->supportsSpecConstants())
2242  setSpecializationConstants(InputImpl, NativePrg, Plugin);
2243 
2244  ProgramPtr ProgramManaged(
2245  NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
2246 
2247  // Link a fallback implementation of device libraries if they are not
2248  // supported by a device compiler.
2249  // Pre-compiled programs are supposed to be already linked.
2250  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
2251  // no fallback device library will be linked.
2252  uint32_t DeviceLibReqMask = 0;
2253  if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV &&
2255  DeviceLibReqMask = getDeviceLibReqMask(Img);
2256 
2257  ProgramPtr BuiltProgram =
2258  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2259  getRawSyclObjImpl(Devs[0])->getHandleRef(), DeviceLibReqMask);
2260 
2261  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
2262 
2263  {
2264  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2265  NativePrograms[BuiltProgram.get()] = &Img;
2266  }
2267 
2268  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2269 
2270  // Save program to persistent cache if it is not there
2271  if (!DeviceCodeWasInCache)
2272  PersistentDeviceCodeCache::putItemToDisc(
2273  Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
2274 
2275  return BuiltProgram.release();
2276  };
2277 
2279  auto ResProgram = BuildF();
2280  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
2281  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
2282  InputImpl->get_kernel_ids_ptr(), ResProgram,
2283  InputImpl->get_spec_const_data_ref(),
2284  InputImpl->get_spec_const_blob_ref());
2285 
2286  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2287  }
2288 
2289  uint32_t ImgId = Img.getImageID();
2291  getRawSyclObjImpl(Devs[0])->getHandleRef();
2292  auto CacheKey =
2293  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), PiDevice);
2294 
2295  // CacheKey is captured by reference so when we overwrite it later we can
2296  // reuse this function.
2297  auto GetCachedBuildF = [&Cache, &CacheKey]() {
2298  return Cache.getOrInsertProgram(CacheKey);
2299  };
2300 
2301  // TODO: Throw SYCL2020 style exception
2302  auto BuildResult =
2303  Cache.getOrBuild<compile_program_error>(GetCachedBuildF, BuildF);
2304  // getOrBuild is not supposed to return nullptr
2305  assert(BuildResult != nullptr && "Invalid build result");
2306 
2307  sycl::detail::pi::PiProgram ResProgram = BuildResult->Val;
2308 
2309  // Cache supports key with once device only, but here we have multiple
2310  // devices a program is built for, so add the program to the cache for all
2311  // other devices.
2312  const PluginPtr &Plugin = ContextImpl->getPlugin();
2313  auto CacheOtherDevices = [ResProgram, &Plugin]() {
2314  Plugin->call<PiApiKind::piProgramRetain>(ResProgram);
2315  return ResProgram;
2316  };
2317 
2318  // The program for device "0" is already added to the cache during the first
2319  // call to getOrBuild, so starting with "1"
2320  for (size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2321  const sycl::detail::pi::PiDevice PiDeviceAdd =
2322  getRawSyclObjImpl(Devs[Idx])->getHandleRef();
2323 
2324  // Change device in the cache key to reduce copying of spec const data.
2325  CacheKey.second = PiDeviceAdd;
2326  Cache.getOrBuild<compile_program_error>(GetCachedBuildF, CacheOtherDevices);
2327  // getOrBuild is not supposed to return nullptr
2328  assert(BuildResult != nullptr && "Invalid build result");
2329  }
2330 
2331  // devive_image_impl shares ownership of PIProgram with, at least, program
2332  // cache. The ref counter will be descremented in the destructor of
2333  // device_image_impl
2334  Plugin->call<PiApiKind::piProgramRetain>(ResProgram);
2335 
2336  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
2337  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
2338  InputImpl->get_kernel_ids_ptr(), ResProgram,
2339  InputImpl->get_spec_const_data_ref(),
2340  InputImpl->get_spec_const_blob_ref());
2341 
2342  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2343 }
2344 
2345 // When caching is enabled, the returned PiKernel will already have
2346 // its ref count incremented.
2347 std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *>
2348 ProgramManager::getOrCreateKernel(const context &Context,
2349  const std::string &KernelName,
2350  const property_list &PropList,
2351  sycl::detail::pi::PiProgram Program) {
2352 
2353  (void)PropList;
2354 
2355  const ContextImplPtr Ctx = getSyclObjImpl(Context);
2356 
2357  KernelProgramCache &Cache = Ctx->getKernelProgramCache();
2358 
2359  auto BuildF = [this, &Program, &KernelName, &Ctx] {
2360  sycl::detail::pi::PiKernel Kernel = nullptr;
2361 
2362  const PluginPtr &Plugin = Ctx->getPlugin();
2363  Plugin->call<PiApiKind::piKernelCreate>(Program, KernelName.c_str(),
2364  &Kernel);
2365 
2366  // Only set PI_USM_INDIRECT_ACCESS if the platform can handle it.
2367  if (Ctx->getPlatformImpl()->supports_usm())
2368  Plugin->call<PiApiKind::piKernelSetExecInfo>(
2369  Kernel, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE);
2370 
2371  // Ignore possible m_UseSpvFile for now.
2372  // TODO consider making m_UseSpvFile interact with kernel bundles as well.
2373  const KernelArgMask *KernelArgMask =
2374  getEliminatedKernelArgMask(Program, KernelName);
2375 
2376  return std::make_pair(Kernel, KernelArgMask);
2377  };
2378 
2379  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2380  return Cache.getOrInsertKernel(Program, KernelName);
2381  };
2382 
2384  // The built kernel cannot be shared between multiple
2385  // threads when caching is disabled, so we can return
2386  // nullptr for the mutex.
2387  auto [Kernel, ArgMask] = BuildF();
2388  return make_tuple(Kernel, nullptr, ArgMask);
2389  }
2390 
2391  auto BuildResult =
2392  Cache.getOrBuild<invalid_object_error>(GetCachedBuildF, BuildF);
2393  // getOrBuild is not supposed to return nullptr
2394  assert(BuildResult != nullptr && "Invalid build result");
2395  // If caching is enabled, one copy of the kernel handle will be
2396  // stored in the cache, and one handle is returned to the
2397  // caller. In that case, we need to increase the ref count of the
2398  // kernel.
2399  Ctx->getPlugin()->call<PiApiKind::piKernelRetain>(BuildResult->Val.first);
2400  return std::make_tuple(BuildResult->Val.first,
2401  &(BuildResult->MBuildResultMutex),
2402  BuildResult->Val.second);
2403 }
2404 
2406  const RTDeviceBinaryImage &Img) {
2407  return !checkDevSupportDeviceRequirements(Dev, Img).has_value();
2408 }
2409 
2410 static std::string getAspectNameStr(sycl::aspect AspectNum) {
2411 #define __SYCL_ASPECT(ASPECT, ID) \
2412  case aspect::ASPECT: \
2413  return #ASPECT;
2414 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
2415 // We don't need "case aspect::usm_allocator" here because it will duplicate
2416 // "case aspect::usm_system_allocations", therefore leave this macro empty
2417 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
2418  switch (AspectNum) {
2419 #include <sycl/info/aspects.def>
2420 #include <sycl/info/aspects_deprecated.def>
2421  }
2423  "Unknown aspect " +
2424  std::to_string(static_cast<unsigned>(AspectNum)));
2425 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2426 #undef __SYCL_ASPECT_DEPRECATED
2427 #undef __SYCL_ASPECT
2428 }
2429 
2430 // Check if the multiplication over unsigned integers overflows
2431 template <typename T>
2432 static std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
2434  if (y == 0)
2435  return 0;
2436  if (x > std::numeric_limits<T>::max() / y)
2437  return {};
2438  else
2439  return x * y;
2440 }
2441 
2442 namespace matrix_ext = ext::oneapi::experimental::matrix;
2443 
2444 // Matrix type string to matrix_type enum value conversion
2445 // Note: matrix type strings are defined in template specialization for
2446 // convertTypeToMatrixTypeString above
2447 std::optional<matrix_ext::matrix_type>
2449  const std::string &MatrixTypeString) {
2450  assert(!MatrixTypeString.empty() &&
2451  "MatrixTypeString type string can't be empty. Check if required "
2452  "template specialization for convertTypeToMatrixTypeString exists.");
2453  std::string_view MatrixTypeStringView = MatrixTypeString;
2454  std::string Prefix("matrix_type::");
2455  assert((MatrixTypeStringView.substr(0, Prefix.size()) == Prefix) &&
2456  "MatrixTypeString has incorrect prefix, should be \"matrix_type::\".");
2457  MatrixTypeStringView.remove_prefix(Prefix.size());
2458  if ("bf16" == MatrixTypeStringView)
2459  return matrix_ext::matrix_type::bf16;
2460  else if ("fp16" == MatrixTypeStringView)
2461  return matrix_ext::matrix_type::fp16;
2462  else if ("tf32" == MatrixTypeStringView)
2463  return matrix_ext::matrix_type::tf32;
2464  else if ("fp32" == MatrixTypeStringView)
2465  return matrix_ext::matrix_type::fp32;
2466  else if ("fp64" == MatrixTypeStringView)
2467  return matrix_ext::matrix_type::fp64;
2468  else if ("sint8" == MatrixTypeStringView)
2469  return matrix_ext::matrix_type::sint8;
2470  else if ("sint16" == MatrixTypeStringView)
2471  return matrix_ext::matrix_type::sint16;
2472  else if ("sint32" == MatrixTypeStringView)
2473  return matrix_ext::matrix_type::sint32;
2474  else if ("sint64" == MatrixTypeStringView)
2475  return matrix_ext::matrix_type::sint64;
2476  else if ("uint8" == MatrixTypeStringView)
2477  return matrix_ext::matrix_type::uint8;
2478  else if ("uint16" == MatrixTypeStringView)
2479  return matrix_ext::matrix_type::uint16;
2480  else if ("uint32" == MatrixTypeStringView)
2481  return matrix_ext::matrix_type::uint32;
2482  else if ("uint64" == MatrixTypeStringView)
2483  return matrix_ext::matrix_type::uint64;
2484  return std::nullopt;
2485 }
2486 
2487 bool isMatrixSupportedByHW(const std::string &MatrixTypeStrUser,
2488  size_t RowsUser, size_t ColsUser,
2489  matrix_ext::matrix_type MatrixTypeRuntime,
2490  size_t MaxRowsRuntime, size_t MaxColsRuntime,
2491  size_t RowsRuntime, size_t ColsRuntime) {
2492  std::optional<matrix_ext::matrix_type> MatrixTypeUserOpt =
2493  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeStrUser);
2494  if (!MatrixTypeUserOpt)
2495  return false;
2496  bool IsMatrixTypeSupported = (MatrixTypeUserOpt.value() == MatrixTypeRuntime);
2497  bool IsRowsSupported = ((RowsRuntime != 0) ? (RowsUser == RowsRuntime)
2498  : (RowsUser <= MaxRowsRuntime));
2499  bool IsColsSupported = ((ColsRuntime != 0) ? (ColsUser == ColsRuntime)
2500  : (ColsUser <= MaxColsRuntime));
2501  return IsMatrixTypeSupported && IsRowsSupported && IsColsSupported;
2502 }
2503 
2504 std::optional<sycl::exception> checkDevSupportJointMatrix(
2505  const std::string &JointMatrixProStr,
2506  const std::vector<ext::oneapi::experimental::matrix::combination>
2507  &SupportedMatrixCombinations) {
2508  std::istringstream JointMatrixStrStream(JointMatrixProStr);
2509  std::string SingleJointMatrix;
2510 
2511  // start to parse the value which is generated by
2512  // SYCLPropagateJointMatrixUsage pass
2513  while (std::getline(JointMatrixStrStream, SingleJointMatrix, ';')) {
2514  std::istringstream SingleJointMatrixStrStream(SingleJointMatrix);
2515  std::vector<std::string> JointMatrixVec;
2516  std::string Item;
2517 
2518  while (std::getline(SingleJointMatrixStrStream, Item, ',')) {
2519  JointMatrixVec.push_back(Item);
2520  }
2521 
2522  assert(JointMatrixVec.size() == 4 &&
2523  "Property set is corrupted, it must have 4 elements.");
2524 
2525  const std::string &MatrixTypeUser = JointMatrixVec[0];
2526  const std::string &UseStrUser = JointMatrixVec[1];
2527  size_t RowsUser, ColsUser = 0;
2528  try {
2529  RowsUser = std::stoi(JointMatrixVec[2]);
2530  ColsUser = std::stoi(JointMatrixVec[3]);
2531  } catch (std::logic_error &) {
2532  // ignore exceptions, one way or another a user will see sycl::exception
2533  // with the message about incorrect rows or cols, because they are
2534  // initialized with 0 above
2535  }
2536 
2537  bool IsMatrixCompatible = false;
2538 
2539  for (const auto &Combination : SupportedMatrixCombinations) {
2540  std::optional<ext::oneapi::experimental::matrix::use> Use =
2541  detail::convertMatrixUseStringToEnum(UseStrUser.c_str());
2542  assert(Use && "Property set has empty matrix::use value.");
2543  switch (Use.value()) {
2544  case matrix_ext::use::a:
2545  IsMatrixCompatible |= isMatrixSupportedByHW(
2546  MatrixTypeUser, RowsUser, ColsUser, Combination.atype,
2547  Combination.max_msize, Combination.max_ksize, Combination.msize,
2548  Combination.ksize);
2549  break;
2550  case matrix_ext::use::b:
2551  IsMatrixCompatible |= isMatrixSupportedByHW(
2552  MatrixTypeUser, RowsUser, ColsUser, Combination.btype,
2553  Combination.max_ksize, Combination.max_nsize, Combination.ksize,
2554  Combination.nsize);
2555  break;
2556  case matrix_ext::use::accumulator: {
2557  IsMatrixCompatible |= isMatrixSupportedByHW(
2558  MatrixTypeUser, RowsUser, ColsUser, Combination.ctype,
2559  Combination.max_msize, Combination.max_nsize, Combination.msize,
2560  Combination.nsize);
2561  IsMatrixCompatible |= isMatrixSupportedByHW(
2562  MatrixTypeUser, RowsUser, ColsUser, Combination.dtype,
2563  Combination.max_msize, Combination.max_nsize, Combination.msize,
2564  Combination.nsize);
2565  break;
2566  }
2567  }
2568 
2569  // early exit if we have a match
2570  if (IsMatrixCompatible)
2571  break;
2572  }
2573 
2574  if (!IsMatrixCompatible)
2576  "joint_matrix with parameters " + MatrixTypeUser +
2577  ", " + UseStrUser +
2578  ", Rows=" + std::to_string(RowsUser) +
2579  ", Cols=" + std::to_string(ColsUser) +
2580  " is not supported on this device");
2581  }
2582  return std::nullopt;
2583 }
2584 
2585 std::optional<sycl::exception> checkDevSupportJointMatrixMad(
2586  const std::string &JointMatrixProStr,
2587  const std::vector<ext::oneapi::experimental::matrix::combination>
2588  &SupportedMatrixCombinations) {
2589  std::istringstream JointMatrixMadStrStream(JointMatrixProStr);
2590  std::string SingleJointMatrixMad;
2591 
2592  // start to parse the value which is generated by
2593  // SYCLPropagateJointMatrixUsage pass
2594  while (std::getline(JointMatrixMadStrStream, SingleJointMatrixMad, ';')) {
2595  std::istringstream SingleJointMatrixMadStrStream(SingleJointMatrixMad);
2596  std::vector<std::string> JointMatrixMadVec;
2597  std::string Item;
2598 
2599  while (std::getline(SingleJointMatrixMadStrStream, Item, ',')) {
2600  JointMatrixMadVec.push_back(Item);
2601  }
2602 
2603  assert(JointMatrixMadVec.size() == 7 &&
2604  "Property set is corrupted, it must have 7 elements.");
2605 
2606  const std::string &MatrixTypeAStrUser = JointMatrixMadVec[0];
2607  const std::string &MatrixTypeBStrUser = JointMatrixMadVec[1];
2608  const std::string &MatrixTypeCStrUser = JointMatrixMadVec[2];
2609  const std::string &MatrixTypeDStrUser = JointMatrixMadVec[3];
2610  size_t MSizeUser, KSizeUser, NSizeUser = 0;
2611  try {
2612  MSizeUser = std::stoi(JointMatrixMadVec[4]);
2613  KSizeUser = std::stoi(JointMatrixMadVec[5]);
2614  NSizeUser = std::stoi(JointMatrixMadVec[6]);
2615  } catch (std::logic_error &) {
2616  // ignore exceptions, one way or another a user will see sycl::exception
2617  // with the message about incorrect size(s), because they are
2618  // initialized with 0 above
2619  }
2620 
2621  std::optional<matrix_ext::matrix_type> MatrixTypeAUserOpt =
2622  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeAStrUser);
2623  std::optional<matrix_ext::matrix_type> MatrixTypeBUserOpt =
2624  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeBStrUser);
2625  std::optional<matrix_ext::matrix_type> MatrixTypeCUserOpt =
2626  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeCStrUser);
2627  std::optional<matrix_ext::matrix_type> MatrixTypeDUserOpt =
2628  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeDStrUser);
2629 
2630  bool IsMatrixMadCompatible = false;
2631 
2632  for (const auto &Combination : SupportedMatrixCombinations) {
2633  if (!MatrixTypeAUserOpt || !MatrixTypeBUserOpt || !MatrixTypeCUserOpt ||
2634  !MatrixTypeDUserOpt)
2635  continue;
2636 
2637  bool IsMatrixTypeACompatible =
2638  (MatrixTypeAUserOpt.value() == Combination.atype);
2639  bool IsMatrixTypeBCompatible =
2640  (MatrixTypeBUserOpt.value() == Combination.btype);
2641  bool IsMatrixTypeCCompatible =
2642  (MatrixTypeCUserOpt.value() == Combination.ctype);
2643  bool IsMatrixTypeDCompatible =
2644  (MatrixTypeDUserOpt.value() == Combination.dtype);
2645  bool IsMSizeCompatible =
2646  ((Combination.msize != 0) ? (MSizeUser == Combination.msize)
2647  : (MSizeUser <= Combination.max_msize));
2648  bool IsKSizeCompatible =
2649  ((Combination.ksize != 0) ? (KSizeUser == Combination.ksize)
2650  : (KSizeUser <= Combination.max_ksize));
2651  bool IsNSizeCompatible =
2652  ((Combination.nsize != 0) ? (NSizeUser == Combination.nsize)
2653  : (NSizeUser <= Combination.max_nsize));
2654 
2655  IsMatrixMadCompatible =
2656  IsMatrixTypeACompatible && IsMatrixTypeBCompatible &&
2657  IsMatrixTypeCCompatible && IsMatrixTypeDCompatible &&
2658  IsMSizeCompatible && IsKSizeCompatible && IsNSizeCompatible;
2659 
2660  // early exit if we have a match
2661  if (IsMatrixMadCompatible)
2662  break;
2663  }
2664 
2665  if (!IsMatrixMadCompatible)
2666  return sycl::exception(
2668  "joint_matrix_mad function with parameters atype=" +
2669  MatrixTypeAStrUser + ", btype=" + MatrixTypeBStrUser +
2670  ", ctype=" + MatrixTypeCStrUser + ", dtype=" +
2671  MatrixTypeDStrUser + ", M=" + std::to_string(MSizeUser) + ", K=" +
2672  std::to_string(KSizeUser) + ", N=" + std::to_string(NSizeUser) +
2673  " is not supported on this "
2674  "device");
2675  }
2676  return std::nullopt;
2677 }
2678 
2679 std::optional<sycl::exception>
2681  const RTDeviceBinaryImage &Img,
2682  const NDRDescT &NDRDesc) {
2683  auto getPropIt = [&Img](const std::string &PropName) {
2684  const RTDeviceBinaryImage::PropertyRange &PropRange =
2685  Img.getDeviceRequirements();
2687  PropRange.begin(), PropRange.end(),
2689  return (*Prop)->Name == PropName;
2690  });
2691  return (PropIt == PropRange.end())
2692  ? std::nullopt
2693  : std::optional<
2695  };
2696 
2697  auto AspectsPropIt = getPropIt("aspects");
2698  auto JointMatrixPropIt = getPropIt("joint_matrix");
2699  auto JointMatrixMadPropIt = getPropIt("joint_matrix_mad");
2700  auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size");
2701  auto ReqdWGSizeUint64TPropIt = getPropIt("reqd_work_group_size_uint64_t");
2702  auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size");
2703 
2704  // Checking if device supports defined aspects
2705  if (AspectsPropIt) {
2706  ByteArray Aspects =
2707  DeviceBinaryProperty(*(AspectsPropIt.value())).asByteArray();
2708  // Drop 8 bytes describing the size of the byte array.
2709  Aspects.dropBytes(8);
2710  while (!Aspects.empty()) {
2711  aspect Aspect = Aspects.consume<aspect>();
2712  if (!Dev.has(Aspect))
2714  "Required aspect " + getAspectNameStr(Aspect) +
2715  " is not supported on the device");
2716  }
2717  }
2718 
2719  if (JointMatrixPropIt) {
2720  std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
2721  Dev.get_info<
2722  ext::oneapi::experimental::info::device::matrix_combinations>();
2723 
2724  if (Combinations.empty())
2726  "no matrix hardware on the target device, "
2727  "joint_matrix is not supported");
2728 
2729  ByteArray JointMatrixByteArray =
2730  DeviceBinaryProperty(*(JointMatrixPropIt.value())).asByteArray();
2731  // Drop 8 bytes describing the size of the byte array.
2732  JointMatrixByteArray.dropBytes(8);
2733  std::string JointMatrixByteArrayToStr;
2734  while (!JointMatrixByteArray.empty()) {
2735  JointMatrixByteArrayToStr += JointMatrixByteArray.consume<char>();
2736  }
2737  std::optional<sycl::exception> Result =
2738  checkDevSupportJointMatrix(JointMatrixByteArrayToStr, Combinations);
2739  if (Result)
2740  return Result.value();
2741  }
2742 
2743  if (JointMatrixMadPropIt) {
2744  std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
2745  Dev.get_info<
2746  ext::oneapi::experimental::info::device::matrix_combinations>();
2747 
2748  if (Combinations.empty())
2750  "no matrix hardware on the target device, "
2751  "joint_matrix_mad is not supported");
2752 
2753  ByteArray JointMatrixMadByteArray =
2754  DeviceBinaryProperty(*(JointMatrixMadPropIt.value())).asByteArray();
2755  // Drop 8 bytes describing the size of the byte array.
2756  JointMatrixMadByteArray.dropBytes(8);
2757  std::string JointMatrixMadByteArrayToStr;
2758  while (!JointMatrixMadByteArray.empty()) {
2759  JointMatrixMadByteArrayToStr += JointMatrixMadByteArray.consume<char>();
2760  }
2761  std::optional<sycl::exception> Result = checkDevSupportJointMatrixMad(
2762  JointMatrixMadByteArrayToStr, Combinations);
2763  if (Result)
2764  return Result.value();
2765  }
2766 
2767  // Checking if device supports defined required work group size
2768  if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
2770  // stores its values as uint32_t, but this needed to be expanded to
2771  // uint64_t. However, this change did not happen in ABI-breaking
2772  // window, so we attach the required work-group size as the
2773  // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
2774  // window, we can remove the logic for the 32 bit property.
2775  bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value();
2776  auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
2777 
2778  ByteArray ReqdWGSize = DeviceBinaryProperty(*(it.value())).asByteArray();
2779  // Drop 8 bytes describing the size of the byte array.
2780  ReqdWGSize.dropBytes(8);
2781  uint64_t ReqdWGSizeAllDimsTotal = 1;
2782  std::vector<uint64_t> ReqdWGSizeVec;
2783  int Dims = 0;
2784  while (!ReqdWGSize.empty()) {
2785  uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.consume<uint64_t>()
2786  : ReqdWGSize.consume<uint32_t>();
2787  if (auto res = multiply_with_overflow_check(ReqdWGSizeAllDimsTotal,
2788  SingleDimSize))
2789  ReqdWGSizeAllDimsTotal = *res;
2790  else
2791  return sycl::exception(
2792  sycl::errc::kernel_not_supported,
2793  "Required work-group size is not supported"
2794  " (total number of work-items requested can't fit into size_t)");
2795  ReqdWGSizeVec.push_back(SingleDimSize);
2796  Dims++;
2797  }
2798 
2799  if (NDRDesc.Dims != 0 && NDRDesc.Dims != static_cast<size_t>(Dims))
2800  return sycl::exception(
2802  "The local size dimension of submitted nd_range doesn't match the "
2803  "required work-group size dimension");
2804 
2805  // The SingleDimSize was computed in an uint64_t; size_t does not
2806  // necessarily have to be the same uint64_t (but should fit in an
2807  // uint64_t).
2808  if (ReqdWGSizeAllDimsTotal >
2809  Dev.get_info<info::device::max_work_group_size>())
2810  return sycl::exception(sycl::errc::kernel_not_supported,
2811  "Required work-group size " +
2812  std::to_string(ReqdWGSizeAllDimsTotal) +
2813  " is not supported on the device");
2814  // Creating std::variant to call max_work_item_sizes one time to avoid
2815  // performance drop
2816  std::variant<id<1>, id<2>, id<3>> MaxWorkItemSizesVariant;
2817  if (Dims == 1)
2818  MaxWorkItemSizesVariant =
2820  else if (Dims == 2)
2821  MaxWorkItemSizesVariant =
2823  else // (Dims == 3)
2824  MaxWorkItemSizesVariant =
2826  for (int i = 0; i < Dims; i++) {
2827  // Extracting value from std::variant to avoid dealing with type-safety
2828  // issues after that
2829  if (Dims == 1) {
2830  // ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes
2831  if (ReqdWGSizeVec[i] >
2832  std::get<id<1>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2833  return sycl::exception(sycl::errc::kernel_not_supported,
2834  "Required work-group size " +
2835  std::to_string(ReqdWGSizeVec[i]) +
2836  " is not supported");
2837  } else if (Dims == 2) {
2838  if (ReqdWGSizeVec[i] >
2839  std::get<id<2>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2840  return sycl::exception(sycl::errc::kernel_not_supported,
2841  "Required work-group size " +
2842  std::to_string(ReqdWGSizeVec[i]) +
2843  " is not supported");
2844  } else // (Dims == 3)
2845  if (ReqdWGSizeVec[i] >
2846  std::get<id<3>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2847  return sycl::exception(sycl::errc::kernel_not_supported,
2848  "Required work-group size " +
2849  std::to_string(ReqdWGSizeVec[i]) +
2850  " is not supported");
2851  }
2852  }
2853 
2854  // Check if device supports required sub-group size.
2855  if (ReqdSubGroupSizePropIt) {
2856  auto ReqdSubGroupSize =
2857  DeviceBinaryProperty(*(ReqdSubGroupSizePropIt.value())).asUint32();
2858  auto SupportedSubGroupSizes = Dev.get_info<info::device::sub_group_sizes>();
2859  // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD,
2860  // as ESIMD images have a reqd-sub-group-size of 1, but currently
2861  // no backend currently includes 1 as a valid sub-group size.
2862  // This can be removed if backends add 1 as a valid sub-group size.
2863  if (!getUint32PropAsBool(Img, "isEsimdImage") &&
2864  std::none_of(SupportedSubGroupSizes.cbegin(),
2865  SupportedSubGroupSizes.cend(),
2866  [=](auto s) { return s == ReqdSubGroupSize; }))
2867  return sycl::exception(sycl::errc::kernel_not_supported,
2868  "Sub-group size " +
2869  std::to_string(ReqdSubGroupSize) +
2870  " is not supported on the device");
2871  }
2872 
2873  return {};
2874 }
2875 
2876 } // namespace detail
2877 } // namespace _V1
2878 } // namespace sycl
2879 
2881  sycl::detail::ProgramManager::getInstance().addImages(desc);
2882 }
2883 
2884 // Executed as a part of current module's (.exe, .dll) static initialization
2886  (void)desc;
2887  // TODO implement the function
2888 }
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:152
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:148
void dropBytes(std::size_t Bytes)
static GlobalHandler & instance()
auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
std::pair< KernelBuildResultPtr, bool > getOrInsertKernel(sycl::detail::pi::PiProgram Program, const std::string &KernelName)
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
std::pair< ProgramBuildResultPtr, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
std::pair< sycl::detail::pi::PiKernel, const KernelArgMask * > KernelArgMaskPairT
static constexpr const char * DirSep
Definition: os_util.hpp:55
static std::string getCurrentDSODir()
Returns an absolute path to a directory where the object was found.
static std::vector< std::vector< char > > getItemFromDisc(const device &Device, const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, const std::string &BuildOptionsString)
static void putItemToDisc(const device &Device, const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, const std::string &BuildOptionsString, const sycl::detail::pi::PiProgram &NativePrg)
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
kernel_id getBuiltInKernelID(const std::string &KernelName)
sycl::detail::pi::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
void addImages(pi_device_binaries DeviceImages)
static ProgramManager & getInstance()
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
const KernelArgMask * getEliminatedKernelArgMask(pi::PiProgram NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
sycl::detail::pi::PiProgram getBuiltPIProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={}, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
sycl::detail::pi::PiProgram getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, const ContextImplPtr Context)
bool kernelUsesAssert(const std::string &KernelName) const
std::pair< sycl::detail::pi::PiProgram, bool > getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a PI program using either a cached device code binary if present in the persistent cache or f...
static std::string getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, const ContextImplPtr Context)
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
void flushSpecConstants(const program_impl &Prg, pi::PiProgram NativePrg=nullptr, const RTDeviceBinaryImage *Img=nullptr)
Resolves given program to a device binary image and requests the program to flush constants the image...
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={})
bool hasCompatibleImage(const device &Dev)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::vector< kernel_id > getAllSYCLKernelIDs()
virtual void dump(std::ostream &Out) const
const pi_device_binary_struct & getRawData() const
pi_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
const PropertyRange & getProgramMetadata() const
const PropertyRange & getDeviceRequirements() const
const PropertyRange & getDeviceLibReqMask() const
pi::PiDeviceBinaryType getFormat() const
Returns the format of the binary image.
static const char * get()
Definition: config.hpp:115
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
bool hasSetSpecConstants() const
Tells whether a specialization constant has been set for this program.
void flush_spec_constants(const RTDeviceBinaryImage &Img, sycl::detail::pi::PiProgram NativePrg=nullptr) const
Takes current values of specialization constants and "injects" them into the underlying native progra...
cl_program get() const
Returns a valid cl_program instance.
sycl::detail::pi::PiProgram & getHandleRef()
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.hpp:223
bool has(aspect Aspect) const __SYCL_WARN_IMAGE_ASPECT(Aspect)
Indicates if the SYCL device has the given feature.
Definition: device.cpp:219
A unique identifier of an item in an index space.
Definition: id.hpp:36
Objects of the class identify kernel is some kernel_bundle related APIs.
detail::is_platform_info_desc< Param >::return_type get_info() const
Queries this SYCL platform for info.
Definition: platform.hpp:184
Objects of the property_list class are containers for the SYCL properties.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
__SYCL_EXTERN_STREAM_ATTRS ostream clog
Linked to standard error (buffered)
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
::pi_device PiDevice
Definition: pi.hpp:131
PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
Definition: pi.cpp:626
::pi_kernel PiKernel
Definition: pi.hpp:138
::pi_device_binary_type PiDeviceBinaryType
Definition: pi.hpp:134
::pi_program PiProgram
Definition: pi.hpp:137
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts)
std::add_pointer_t< typename decltype(T::impl)::element_type > getRawSyclObjImpl(const T &SyclObject)
Definition: impl_utils.hpp:40
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc)
void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image, bool JITCompilationIsRequired)
static bool isDeviceBinaryTypeSupported(const context &C, sycl::detail::pi::PiDeviceBinaryType Format)
static constexpr int DbgProgMgr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
std::optional< matrix_ext::matrix_type > convertMatrixTypeStringMatrixTypeEnumValue(const std::string &MatrixTypeString)
static void enableITTAnnotationsIfNeeded(const sycl::detail::pi::PiProgram &Prog, const PluginPtr &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
static const char * getDeviceLibFilename(DeviceLibExt Extension, bool Native)
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, const char *PropName)
std::vector< bool > KernelArgMask
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
static sycl::detail::pi::PiProgram loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, const sycl::detail::pi::PiDevice &Device, bool UseNativeLib)
static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, const char *PropName)
static void appendLinkOptionsFromImage(std::string &LinkOpts, const RTDeviceBinaryImage &Img)
static constexpr uint32_t ITTSpecConstId
KernelArgMask createKernelArgMask(const ByteArray &Bytes)
static std::string getAspectNameStr(sycl::aspect AspectNum)
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
static void appendCompileOptionsForGRFSizeProperties(std::string &CompileOpts, const RTDeviceBinaryImage &Img, bool IsEsimdImage)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts)
static const std::map< DeviceLibExt, const char * > DeviceLibExtensionStrs
std::optional< sycl::exception > checkDevSupportJointMatrix(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
constexpr std::optional< ext::oneapi::experimental::matrix::use > convertMatrixUseStringToEnum(const char *UseString)
static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devs, const PluginPtr &)
RTDeviceBinaryImage * getBinImageFromMultiMap(const std::unordered_multimap< StorageKey, RTDeviceBinaryImage * > &ImagesSet, const StorageKey &Key, const context &Context, const device &Device)
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
static void setSpecializationConstants(const std::shared_ptr< device_image_impl > &InputImpl, sycl::detail::pi::PiProgram Prog, const PluginPtr &Plugin)
std::shared_ptr< device_impl > DeviceImplPtr
std::optional< sycl::exception > checkDevSupportJointMatrixMad(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devices, const PluginPtr &Plugin)
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
bool isMatrixSupportedByHW(const std::string &MatrixTypeStrUser, size_t RowsUser, size_t ColsUser, matrix_ext::matrix_type MatrixTypeRuntime, size_t MaxRowsRuntime, size_t MaxColsRuntime, size_t RowsRuntime, size_t ColsRuntime)
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:35
static sycl::detail::pi::PiProgram createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
static std::enable_if_t< std::is_unsigned_v< T >, std::optional< T > > multiply_with_overflow_check(T x, T y)
static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts)
static void emitBuiltProgramInfo(const pi_program &Prog, const ContextImplPtr &Context)
Emits information about built programs if the appropriate contitions are met, namely when SYCL_RT_WAR...
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, sycl::detail::pi::PiProgram &Prog)
static const char * getFormatStr(sycl::detail::pi::PiDeviceBinaryType Format)
static void appendCompileEnvironmentVariablesThatAppend(std::string &CompileOpts)
std::vector< unsigned char > SerializedObj
Definition: util.hpp:69
static const std::map< DeviceLibExt, std::pair< const char *, const char * > > DeviceLibNames
static std::vector< sycl::detail::pi::PiProgram > getDeviceLibPrograms(const ContextImplPtr Context, const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask)
static sycl::detail::pi::PiProgram createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< pi_device_binary_property > Metadata)
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: helpers.cpp:22
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
auto autodecltype(a) b
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
autodecltype(x) x
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: access.hpp:18
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_cuda.cpp:341
int32_t pi_int32
Definition: pi.h:212
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:310
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
Definition: pi.h:948
_pi_result
Definition: pi.h:224
pi_uint32 pi_bool
Definition: pi.h:215
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:403
@ PI_DEVICE_INFO_NAME
Definition: pi.h:367
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE
Definition: pi.h:943
pi_result piProgramRetain(pi_program program)
Definition: pi_cuda.cpp:320
const pi_bool PI_TRUE
Definition: pi.h:684
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1603
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:78
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_cuda.cpp:526
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:970
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_cuda.cpp:1060
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_cuda.cpp:1053
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:272
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
uint32_t pi_uint32
Definition: pi.h:213
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV
Definition: pi.h:946
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:971
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_cuda.cpp:248
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
@ PI_KERNEL_INFO_PROGRAM
Definition: pi.h:513
@ PI_CONTEXT_INFO_NUM_DEVICES
Definition: pi.h:482
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_cuda.cpp:253
@ PI_PROGRAM_INFO_DEVICES
Definition: pi.h:471
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:366
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:124
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_cuda.cpp:93
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE
Definition: pi.h:941
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:972
@ PI_PROGRAM_BUILD_INFO_LOG
Definition: pi.h:251
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
bool none_of(const simd_mask< _Tp, _Abi > &) noexcept
Definition: pi.h:898
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:1122
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
Definition: pi.h:1128
pi_device_binary DeviceBinaries
Device binaries data.
Definition: pi.h:1130
This struct is a record of the device binary information.
Definition: pi.h:1025
_pi_offload_entry EntriesEnd
Definition: pi.h:1062
_pi_offload_entry EntriesBegin
the offload entry table
Definition: pi.h:1061
const unsigned char * BinaryEnd
Pointer to the target code end.
Definition: pi.h:1059
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
Definition: pi.h:1045
const unsigned char * BinaryStart
Pointer to the target code start.
Definition: pi.h:1057