DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.cpp
Go to the documentation of this file.
1 //==------ program_manager.cpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/compiler.hpp>
10 #include <detail/config.hpp>
11 #include <detail/context_impl.hpp>
13 #include <detail/device_impl.hpp>
14 #include <detail/event_impl.hpp>
17 #include <detail/platform_impl.hpp>
19 #include <detail/queue_impl.hpp>
21 #include <detail/split_string.hpp>
22 #include <sycl/aspects.hpp>
23 #include <sycl/backend_types.hpp>
24 #include <sycl/context.hpp>
25 #include <sycl/detail/common.hpp>
27 #include <sycl/detail/os_util.hpp>
29 #include <sycl/detail/ur.hpp>
30 #include <sycl/detail/util.hpp>
31 #include <sycl/device.hpp>
32 #include <sycl/exception.hpp>
33 
35 
36 #include <algorithm>
37 #include <cassert>
38 #include <cstdint>
39 #include <cstdlib>
40 #include <cstring>
41 #include <fstream>
42 #include <memory>
43 #include <mutex>
44 #include <sstream>
45 #include <string>
46 #include <variant>
47 
48 namespace sycl {
49 inline namespace _V1 {
50 namespace detail {
51 
52 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
53 
54 static constexpr int DbgProgMgr = 0;
55 
56 static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
57 
60 static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog,
61  const PluginPtr &Plugin) {
63  constexpr char SpecValue = 1;
64  ur_specialization_constant_info_t SpecConstInfo = {
65  ITTSpecConstId, sizeof(char), &SpecValue};
66  Plugin->call(urProgramSetSpecializationConstants, Prog, 1, &SpecConstInfo);
67  }
68 }
69 
72 }
73 
74 static ur_program_handle_t
75 createBinaryProgram(const ContextImplPtr Context, const device &Device,
76  const unsigned char *Data, size_t DataLen,
77  const std::vector<ur_program_metadata_t> Metadata) {
78  const PluginPtr &Plugin = Context->getPlugin();
79 #ifndef _NDEBUG
80  uint32_t NumDevices = 0;
81  Plugin->call(urContextGetInfo, Context->getHandleRef(),
82  UR_CONTEXT_INFO_NUM_DEVICES, sizeof(NumDevices), &NumDevices,
83  /*param_value_size_ret=*/nullptr);
84  assert(NumDevices > 0 &&
85  "Only a single device is supported for AOT compilation");
86 #endif
87 
88  ur_program_handle_t Program;
89  ur_device_handle_t UrDevice = getSyclObjImpl(Device)->getHandleRef();
90  ur_result_t BinaryStatus = UR_RESULT_SUCCESS;
91  ur_program_properties_t Properties = {};
92  Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES;
93  Properties.pNext = nullptr;
94  Properties.count = Metadata.size();
95  Properties.pMetadatas = Metadata.data();
96  Plugin->call(urProgramCreateWithBinary, Context->getHandleRef(), UrDevice,
97  DataLen, Data, &Properties, &Program);
98 
99  if (BinaryStatus != UR_RESULT_SUCCESS) {
100  throw detail::set_ur_error(
102  "Creating program with binary failed."),
103  BinaryStatus);
104  }
105 
106  return Program;
107 }
108 
109 static ur_program_handle_t createSpirvProgram(const ContextImplPtr Context,
110  const unsigned char *Data,
111  size_t DataLen) {
112  ur_program_handle_t Program = nullptr;
113  const PluginPtr &Plugin = Context->getPlugin();
114  Plugin->call(urProgramCreateWithIL, Context->getHandleRef(), Data, DataLen,
115  nullptr, &Program);
116  return Program;
117 }
118 
119 // TODO replace this with a new UR API function
120 static bool
122  ur::DeviceBinaryType Format) {
123  // All formats except SYCL_DEVICE_BINARY_TYPE_SPIRV are supported.
124  if (Format != SYCL_DEVICE_BINARY_TYPE_SPIRV)
125  return true;
126 
127  const backend ContextBackend = detail::getSyclObjImpl(C)->getBackend();
128 
129  // The CUDA backend cannot use SPIR-V
130  if (ContextBackend == backend::ext_oneapi_cuda)
131  return false;
132 
133  std::vector<device> Devices = C.get_devices();
134 
135  // Program type is SPIR-V, so we need a device compiler to do JIT.
136  for (const device &D : Devices) {
137  if (!D.get_info<info::device::is_compiler_available>())
138  return false;
139  }
140 
141  // OpenCL 2.1 and greater require clCreateProgramWithIL
142  if (ContextBackend == backend::opencl) {
143  std::string ver = C.get_platform().get_info<info::platform::version>();
144  if (ver.find("OpenCL 1.0") == std::string::npos &&
145  ver.find("OpenCL 1.1") == std::string::npos &&
146  ver.find("OpenCL 1.2") == std::string::npos &&
147  ver.find("OpenCL 2.0") == std::string::npos)
148  return true;
149  }
150 
151  for (const device &D : Devices) {
152  // We need cl_khr_il_program extension to be present
153  // and we can call clCreateProgramWithILKHR using the extension
154  std::vector<std::string> Extensions =
155  D.get_info<info::device::extensions>();
156  if (Extensions.end() ==
157  std::find(Extensions.begin(), Extensions.end(), "cl_khr_il_program"))
158  return false;
159  }
160 
161  return true;
162 }
163 
164 // getFormatStr is used for debug-printing, so it may be unused.
165 [[maybe_unused]] static const char *getFormatStr(ur::DeviceBinaryType Format) {
166  switch (Format) {
168  return "none";
170  return "native";
172  return "SPIR-V";
174  return "LLVM IR";
175  }
176  assert(false && "Unknown device image format");
177  return "unknown";
178 }
179 
180 ur_program_handle_t
182  const context &Context, const device &Device) {
183  if constexpr (DbgProgMgr > 0)
184  std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", "
185  << getSyclObjImpl(Context).get() << ", "
186  << getSyclObjImpl(Device).get() << ")\n";
187  const sycl_device_binary_struct &RawImg = Img.getRawData();
188 
189  // perform minimal sanity checks on the device image and the descriptor
190  if (RawImg.BinaryEnd < RawImg.BinaryStart) {
192  "Malformed device program image descriptor");
193  }
194  if (RawImg.BinaryEnd == RawImg.BinaryStart) {
196  "Invalid device program image: size is zero");
197  }
198  size_t ImgSize = Img.getSize();
199 
200  // TODO if the binary image is a part of the fat binary, the clang
201  // driver should have set proper format option to the
202  // clang-offload-wrapper. The fix depends on AOT compilation
203  // implementation, so will be implemented together with it.
204  // Img->Format can't be updated as it is inside of the in-memory
205  // OS module binary.
206  ur::DeviceBinaryType Format = Img.getFormat();
207 
208  if (Format == SYCL_DEVICE_BINARY_TYPE_NONE)
209  Format = ur::getBinaryImageFormat(RawImg.BinaryStart, ImgSize);
210  // sycl::detail::pi::PiDeviceBinaryType Format = Img->Format;
211  // assert(Format != SYCL_DEVICE_BINARY_TYPE_NONE && "Image format not set");
212 
213  if (!isDeviceBinaryTypeSupported(Context, Format))
214  throw sycl::exception(
215  sycl::errc::feature_not_supported,
216  "SPIR-V online compilation is not supported in this context");
217 
218  // Get program metadata from properties
219  auto ProgMetadata = Img.getProgramMetadataUR();
220 
221  // Load the image
222  const ContextImplPtr Ctx = getSyclObjImpl(Context);
223  ur_program_handle_t Res =
225  ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize)
226  : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, ImgSize,
227  ProgMetadata);
228 
229  {
230  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
231  // associate the UR program with the image it was created for
232  NativePrograms.insert({Res, &Img});
233  }
234 
235  Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
236 
237  if constexpr (DbgProgMgr > 1)
238  std::cerr << "created program: " << Res
239  << "; image format: " << getFormatStr(Format) << "\n";
240 
241  return Res;
242 }
243 
244 static void appendLinkOptionsFromImage(std::string &LinkOpts,
245  const RTDeviceBinaryImage &Img) {
246  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
247  // Update only if link options are not overwritten by environment variable
248  if (!LinkOptsEnv) {
249  const char *TemporaryStr = Img.getLinkOptions();
250  if (TemporaryStr != nullptr) {
251  if (!LinkOpts.empty())
252  LinkOpts += " ";
253  LinkOpts += std::string(TemporaryStr);
254  }
255  }
256 }
257 
259  const char *PropName) {
260  sycl_device_binary_property Prop = Img.getProperty(PropName);
261  return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0);
262 }
263 
264 static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img,
265  const char *PropName) {
266  sycl_device_binary_property Prop = Img.getProperty(PropName);
267  std::stringstream ss;
268  if (!Prop)
269  return "";
270  int optLevel = DeviceBinaryProperty(Prop).asUint32();
271  if (optLevel < 0 || optLevel > 3)
272  return "";
273  ss << "-O" << optLevel;
274  std::string temp = ss.str();
275  return temp;
276 }
277 
278 static void
280  const RTDeviceBinaryImage &Img,
281  bool IsEsimdImage) {
282  // TODO: sycl-register-alloc-mode is deprecated and should be removed in the
283  // next ABI break.
284  sycl_device_binary_property RegAllocModeProp =
285  Img.getProperty("sycl-register-alloc-mode");
286  sycl_device_binary_property GRFSizeProp = Img.getProperty("sycl-grf-size");
287 
288  if (!RegAllocModeProp && !GRFSizeProp)
289  return;
290  // The mutual exclusivity of these properties should have been checked in
291  // sycl-post-link.
292  assert(!RegAllocModeProp || !GRFSizeProp);
293  bool Is256GRF = false;
294  bool IsAutoGRF = false;
295  if (RegAllocModeProp) {
296  uint32_t RegAllocModePropVal =
297  DeviceBinaryProperty(RegAllocModeProp).asUint32();
298  Is256GRF = RegAllocModePropVal ==
299  static_cast<uint32_t>(register_alloc_mode_enum::large);
300  IsAutoGRF = RegAllocModePropVal ==
301  static_cast<uint32_t>(register_alloc_mode_enum::automatic);
302  } else {
303  assert(GRFSizeProp);
304  uint32_t GRFSizePropVal = DeviceBinaryProperty(GRFSizeProp).asUint32();
305  Is256GRF = GRFSizePropVal == 256;
306  IsAutoGRF = GRFSizePropVal == 0;
307  }
308  if (Is256GRF) {
309  if (!CompileOpts.empty())
310  CompileOpts += " ";
311  // This option works for both LO AND OCL backends.
312  CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file";
313  }
314  if (IsAutoGRF) {
315  if (!CompileOpts.empty())
316  CompileOpts += " ";
317  // This option works for both LO AND OCL backends.
318  CompileOpts += "-ze-intel-enable-auto-large-GRF-mode";
319  }
320 }
321 
322 static void appendCompileOptionsFromImage(std::string &CompileOpts,
323  const RTDeviceBinaryImage &Img,
324  const std::vector<device> &Devs,
325  const PluginPtr &) {
326  // Build options are overridden if environment variables are present.
327  // Environment variables are not changed during program lifecycle so it
328  // is reasonable to use static here to read them only once.
329  static const char *CompileOptsEnv =
331  // Update only if compile options are not overwritten by environment
332  // variable
333  if (!CompileOptsEnv) {
334  if (!CompileOpts.empty())
335  CompileOpts += " ";
336  const char *TemporaryStr = Img.getCompileOptions();
337  if (TemporaryStr != nullptr)
338  CompileOpts += std::string(TemporaryStr);
339  }
340  bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage");
341  // The -vc-codegen option is always preserved for ESIMD kernels, regardless
342  // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable.
343  if (isEsimdImage) {
344  if (!CompileOpts.empty())
345  CompileOpts += " ";
346  CompileOpts += "-vc-codegen";
347  // Allow warning and performance hints from vc/finalizer if the RT warning
348  // level is at least 1.
350  CompileOpts += " -disable-finalizer-msg";
351  }
352 
353  appendCompileOptionsForGRFSizeProperties(CompileOpts, Img, isEsimdImage);
354 
355  const auto PlatformImpl = detail::getSyclObjImpl(Devs[0].get_platform());
356 
357  // Add optimization flags.
358  auto str = getUint32PropAsOptStr(Img, "optLevel");
359  const char *optLevelStr = str.c_str();
360  // TODO: Passing these options to vector compiler causes build failure in
361  // backend. Will pass the flags once backend compilation issue is resolved.
362  // Update only if compile options are not overwritten by environment
363  // variable.
364  if (!isEsimdImage && !CompileOptsEnv && optLevelStr != nullptr &&
365  optLevelStr[0] != '\0') {
366  // Making sure all devices have the same platform.
367  assert(!Devs.empty() &&
368  std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) {
369  return Dev.get_platform() == Devs[0].get_platform();
370  }));
371  const char *backend_option = nullptr;
372  // Empty string is returned in backend_option when no appropriate backend
373  // option is available for a given frontend option.
374  PlatformImpl->getBackendOption(optLevelStr, &backend_option);
375  if (backend_option && backend_option[0] != '\0') {
376  if (!CompileOpts.empty())
377  CompileOpts += " ";
378  CompileOpts += std::string(backend_option);
379  }
380  }
381  bool IsIntelGPU =
382  (PlatformImpl->getBackend() == backend::ext_oneapi_level_zero ||
383  PlatformImpl->getBackend() == backend::opencl) &&
384  std::all_of(Devs.begin(), Devs.end(), [](const device &Dev) {
385  return Dev.is_gpu() &&
386  Dev.get_info<info::device::vendor_id>() == 0x8086;
387  });
388  if (!CompileOptsEnv) {
389  static const char *TargetCompileFast = "-ftarget-compile-fast";
390  if (auto Pos = CompileOpts.find(TargetCompileFast);
391  Pos != std::string::npos) {
392  const char *BackendOption = nullptr;
393  if (IsIntelGPU)
394  PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
395  auto OptLen = strlen(TargetCompileFast);
396  if (IsIntelGPU && BackendOption && BackendOption[0] != '\0')
397  CompileOpts.replace(Pos, OptLen, BackendOption);
398  else
399  CompileOpts.erase(Pos, OptLen);
400  }
401  static const std::string TargetRegisterAllocMode =
402  "-ftarget-register-alloc-mode=";
403  auto OptPos = CompileOpts.find(TargetRegisterAllocMode);
404  while (OptPos != std::string::npos) {
405  auto EndOfOpt = CompileOpts.find(" ", OptPos);
406  // Extract everything after the equals until the end of the option
407  auto OptValue = CompileOpts.substr(
408  OptPos + TargetRegisterAllocMode.size(),
409  EndOfOpt - OptPos - TargetRegisterAllocMode.size());
410  auto ColonPos = OptValue.find(":");
411  auto Device = OptValue.substr(0, ColonPos);
412  std::string BackendStrToAdd;
413  bool IsPVC =
414  std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) {
415  return IsIntelGPU &&
416  (Dev.get_info<ext::intel::info::device::device_id>() &
417  0xFF00) == 0x0B00;
418  });
419  // Currently 'pvc' is the only supported device.
420  if (Device == "pvc" && IsPVC)
421  BackendStrToAdd = " " + OptValue.substr(ColonPos + 1) + " ";
422 
423  // Extract everything before this option
424  std::string NewCompileOpts =
425  CompileOpts.substr(0, OptPos) + BackendStrToAdd;
426  // Extract everything after this option and add it to the above.
427  if (EndOfOpt != std::string::npos)
428  NewCompileOpts += CompileOpts.substr(EndOfOpt);
429  CompileOpts = NewCompileOpts;
430  OptPos = CompileOpts.find(TargetRegisterAllocMode);
431  }
432  }
433 }
434 
435 static void
437  static const char *AppendCompileOptsEnv =
439  if (AppendCompileOptsEnv) {
440  if (!CompileOpts.empty())
441  CompileOpts += " ";
442  CompileOpts += AppendCompileOptsEnv;
443  }
444 }
445 static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts) {
446  static const char *AppendLinkOptsEnv =
448  if (AppendLinkOptsEnv) {
449  if (!LinkOpts.empty())
450  LinkOpts += " ";
451  LinkOpts += AppendLinkOptsEnv;
452  }
453 }
454 
455 static void applyOptionsFromImage(std::string &CompileOpts,
456  std::string &LinkOpts,
457  const RTDeviceBinaryImage &Img,
458  const std::vector<device> &Devices,
459  const PluginPtr &Plugin) {
460  appendCompileOptionsFromImage(CompileOpts, Img, Devices, Plugin);
461  appendLinkOptionsFromImage(LinkOpts, Img);
462 }
463 
464 static void applyCompileOptionsFromEnvironment(std::string &CompileOpts) {
465  // Environment variables are not changed during program lifecycle so it
466  // is reasonable to use static here to read them only once.
467  static const char *CompileOptsEnv =
469  if (CompileOptsEnv) {
470  CompileOpts = CompileOptsEnv;
471  }
472 }
473 
474 static void applyLinkOptionsFromEnvironment(std::string &LinkOpts) {
475  // Environment variables are not changed during program lifecycle so it
476  // is reasonable to use static here to read them only once.
477  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
478  if (LinkOptsEnv) {
479  LinkOpts = LinkOptsEnv;
480  }
481 }
482 
483 static void applyOptionsFromEnvironment(std::string &CompileOpts,
484  std::string &LinkOpts) {
485  // Build options are overridden if environment variables are present.
488 }
489 
490 std::pair<ur_program_handle_t, bool> ProgramManager::getOrCreateURProgram(
491  const RTDeviceBinaryImage &MainImg,
492  const std::vector<const RTDeviceBinaryImage *> &AllImages,
493  const context &Context, const device &Device,
494  const std::string &CompileAndLinkOptions, SerializedObj SpecConsts) {
495  ur_program_handle_t NativePrg; // TODO: Or native?
496 
498  Device, AllImages, SpecConsts, CompileAndLinkOptions);
499  if (BinProg.size()) {
500  // Get program metadata from properties
501  std::vector<ur_program_metadata_t> ProgMetadataVector;
502  for (const RTDeviceBinaryImage *Img : AllImages) {
503  auto ProgMetadata = Img->getProgramMetadata();
504  for (const auto &Prop : ProgMetadata) {
505  ProgMetadataVector.push_back(
507  }
508  }
509  // TODO: Build for multiple devices once supported by program manager
510  NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
511  (const unsigned char *)BinProg[0].data(),
512  BinProg[0].size(), ProgMetadataVector);
513  } else {
514  NativePrg = createURProgram(MainImg, Context, Device);
515  }
516  return {NativePrg, BinProg.size()};
517 }
518 
521 static void emitBuiltProgramInfo(const ur_program_handle_t &Prog,
522  const ContextImplPtr &Context) {
524  std::string ProgramBuildLog =
525  ProgramManager::getProgramBuildLog(Prog, Context);
526  std::clog << ProgramBuildLog << std::endl;
527  }
528 }
529 
530 static const char *getUrDeviceTarget(const char *URDeviceTarget) {
531  if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_UNKNOWN) == 0)
532  return UR_DEVICE_BINARY_TARGET_UNKNOWN;
533  else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV32) == 0)
534  return UR_DEVICE_BINARY_TARGET_SPIRV32;
535  else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0)
536  return UR_DEVICE_BINARY_TARGET_SPIRV64;
537  else if (strcmp(URDeviceTarget,
539  return UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64;
540  else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) ==
541  0)
542  return UR_DEVICE_BINARY_TARGET_SPIRV64_GEN;
543  else if (strcmp(URDeviceTarget,
545  return UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA;
546  else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0)
547  return UR_DEVICE_BINARY_TARGET_NVPTX64;
548  else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0)
549  return UR_DEVICE_BINARY_TARGET_AMDGCN;
550  else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) ==
551  0)
552  return "native_cpu"; // todo: define UR_DEVICE_BINARY_TARGET_NATIVE_CPU;
553 
554  return UR_DEVICE_BINARY_TARGET_UNKNOWN;
555 }
556 
558  const device &Dev) {
559  const std::shared_ptr<detail::device_impl> &DeviceImpl =
561  auto &Plugin = DeviceImpl->getPlugin();
562 
563  const ur_device_handle_t &URDeviceHandle = DeviceImpl->getHandleRef();
564 
565  // Call urDeviceSelectBinary with only one image to check if an image is
566  // compatible with implementation. The function returns invalid index if no
567  // device images are compatible.
568  uint32_t SuitableImageID = std::numeric_limits<uint32_t>::max();
569  sycl_device_binary DevBin =
570  const_cast<sycl_device_binary>(&BinImage->getRawData());
571 
572  ur_device_binary_t UrBinary{};
573  UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin->DeviceTargetSpec);
574 
575  ur_result_t Error =
576  Plugin->call_nocheck(urDeviceSelectBinary, URDeviceHandle, &UrBinary,
577  /*num bin images = */ (uint32_t)1, &SuitableImageID);
578  if (Error != UR_RESULT_SUCCESS && Error != UR_RESULT_ERROR_INVALID_BINARY)
580  "Invalid binary image or device"),
581  Error);
582 
583  return (0 == SuitableImageID);
584 }
585 
586 std::set<RTDeviceBinaryImage *>
587 ProgramManager::collectDeviceImageDepsForImportedSymbols(
588  const RTDeviceBinaryImage &MainImg, device Dev) {
589  std::set<RTDeviceBinaryImage *> DeviceImagesToLink;
590  std::set<std::string> HandledSymbols;
591  std::queue<std::string> WorkList;
592  for (const sycl_device_binary_property &ISProp :
593  MainImg.getImportedSymbols()) {
594  WorkList.push(ISProp->Name);
595  HandledSymbols.insert(ISProp->Name);
596  }
597  ur::DeviceBinaryType Format = MainImg.getFormat();
598  if (!WorkList.empty() && Format != SYCL_DEVICE_BINARY_TYPE_SPIRV)
600  "Dynamic linking is not supported for AOT compilation yet");
601  while (!WorkList.empty()) {
602  std::string Symbol = WorkList.front();
603  WorkList.pop();
604 
605  auto Range = m_ExportedSymbolImages.equal_range(Symbol);
606  bool Found = false;
607  for (auto It = Range.first; It != Range.second; ++It) {
608  RTDeviceBinaryImage *Img = It->second;
609  if (Img->getFormat() != Format ||
610  !doesDevSupportDeviceRequirements(Dev, *Img) ||
611  !compatibleWithDevice(Img, Dev))
612  continue;
613  DeviceImagesToLink.insert(Img);
614  Found = true;
615  for (const sycl_device_binary_property &ISProp :
616  Img->getImportedSymbols()) {
617  if (HandledSymbols.insert(ISProp->Name).second)
618  WorkList.push(ISProp->Name);
619  }
620  break;
621  }
622  if (!Found)
624  "No device image found for external symbol " +
625  Symbol);
626  }
627  DeviceImagesToLink.erase(const_cast<RTDeviceBinaryImage *>(&MainImg));
628  return DeviceImagesToLink;
629 }
630 
631 std::set<RTDeviceBinaryImage *>
632 ProgramManager::collectDependentDeviceImagesForVirtualFunctions(
633  const RTDeviceBinaryImage &Img, device Dev) {
634  // If virtual functions are used in a program, then we need to link several
635  // device images together to make sure that vtable pointers stored in
636  // objects are valid between different kernels (which could be in different
637  // device images).
638  std::set<RTDeviceBinaryImage *> DeviceImagesToLink;
639  // KernelA may use some set-a, which is also used by KernelB that in turn
640  // uses set-b, meaning that this search should be recursive. The set below
641  // is used to stop that recursion, i.e. to avoid looking at sets we have
642  // already seen.
643  std::set<std::string> HandledSets;
644  std::queue<std::string> WorkList;
645  for (const sycl_device_binary_property &VFProp : Img.getVirtualFunctions()) {
646  std::string StrValue = DeviceBinaryProperty(VFProp).asCString();
647  // Device image passed to this function is expected to contain SYCL kernels
648  // and therefore it may only use virtual function sets, but cannot provide
649  // them. We expect to see just a single property here
650  assert(std::string(VFProp->Name) == "uses-virtual-functions-set" &&
651  "Unexpected virtual function property");
652  for (const auto &SetName : detail::split_string(StrValue, ',')) {
653  WorkList.push(SetName);
654  HandledSets.insert(SetName);
655  }
656  }
657 
658  while (!WorkList.empty()) {
659  std::string SetName = WorkList.front();
660  WorkList.pop();
661 
662  // There could be more than one device image that uses the same set
663  // of virtual functions, or provides virtual funtions from the same
664  // set.
665  for (RTDeviceBinaryImage *BinImage : m_VFSet2BinImage[SetName]) {
666  // Here we can encounter both uses-virtual-functions-set and
667  // virtual-functions-set properties, but their handling is the same: we
668  // just grab all sets they reference and add them for consideration if
669  // we haven't done so already.
670  for (const sycl_device_binary_property &VFProp :
671  BinImage->getVirtualFunctions()) {
672  std::string StrValue = DeviceBinaryProperty(VFProp).asCString();
673  for (const auto &SetName : detail::split_string(StrValue, ',')) {
674  if (HandledSets.insert(SetName).second)
675  WorkList.push(SetName);
676  }
677  }
678 
679  // TODO: Complete this part about handling of incompatible device images.
680  // If device image uses the same virtual function set, then we only
681  // link it if it is compatible.
682  // However, if device image provides virtual function set and it is
683  // incompatible, then we should link its "dummy" version to avoid link
684  // errors about unresolved external symbols.
685  if (doesDevSupportDeviceRequirements(Dev, *BinImage))
686  DeviceImagesToLink.insert(BinImage);
687  }
688  }
689 
690  // We may have inserted the original image into the list as well, because it
691  // is also a part of m_VFSet2BinImage map. No need to to return it to avoid
692  // passing it twice to link call later.
693  DeviceImagesToLink.erase(const_cast<RTDeviceBinaryImage *>(&Img));
694 
695  return DeviceImagesToLink;
696 }
697 
698 static void
699 setSpecializationConstants(const std::shared_ptr<device_image_impl> &InputImpl,
700  ur_program_handle_t Prog, const PluginPtr &Plugin) {
701  // Set ITT annotation specialization constant if needed.
702  enableITTAnnotationsIfNeeded(Prog, Plugin);
703 
704  std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
705  const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
706  &SpecConstData = InputImpl->get_spec_const_data_ref();
707  const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
708 
709  // Set all specialization IDs from descriptors in the input device image.
710  for (const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
711  std::ignore = SpecConstNames;
712  for (const device_image_impl::SpecConstDescT &SpecIDDesc : SpecConstDescs) {
713  if (SpecIDDesc.IsSet) {
714  ur_specialization_constant_info_t SpecConstInfo = {
715  SpecIDDesc.ID, SpecIDDesc.Size,
716  SpecConsts.data() + SpecIDDesc.BlobOffset};
717  Plugin->call(urProgramSetSpecializationConstants, Prog, 1,
718  &SpecConstInfo);
719  }
720  }
721  }
722 }
723 
724 // When caching is enabled, the returned UrProgram will already have
725 // its ref count incremented.
727  const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl,
728  const std::string &KernelName, const NDRDescT &NDRDesc,
729  bool JITCompilationIsRequired) {
730  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
731 
732  std::string CompileOpts;
733  std::string LinkOpts;
734 
735  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
736 
737  SerializedObj SpecConsts;
738 
739  // Check if we can optimize program builds for sub-devices by using a program
740  // built for the root device
741  DeviceImplPtr RootDevImpl = DeviceImpl;
742  while (!RootDevImpl->isRootDevice()) {
743  auto ParentDev = detail::getSyclObjImpl(
744  RootDevImpl->get_info<info::device::parent_device>());
745  // Sharing is allowed within a single context only
746  if (!ContextImpl->hasDevice(ParentDev))
747  break;
748  RootDevImpl = ParentDev;
749  }
750 
751  ur_bool_t MustBuildOnSubdevice = true;
752  ContextImpl->getPlugin()->call(urDeviceGetInfo, RootDevImpl->getHandleRef(),
753  UR_DEVICE_INFO_BUILD_ON_SUBDEVICE,
754  sizeof(ur_bool_t), &MustBuildOnSubdevice,
755  nullptr);
756 
757  DeviceImplPtr Dev = (MustBuildOnSubdevice == true) ? DeviceImpl : RootDevImpl;
758  auto Context = createSyclObjFromImpl<context>(ContextImpl);
759  auto Device = createSyclObjFromImpl<device>(Dev);
760  const RTDeviceBinaryImage &Img =
761  getDeviceImage(KernelName, Context, Device, JITCompilationIsRequired);
762 
763  // Check that device supports all aspects used by the kernel
764  if (auto exception = checkDevSupportDeviceRequirements(Device, Img, NDRDesc))
765  throw *exception;
766 
767  // TODO collecting dependencies for virtual functions and imported symbols
768  // should be combined since one can lead to new unresolved dependencies for
769  // the other.
770  std::set<RTDeviceBinaryImage *> DeviceImagesToLink =
771  collectDependentDeviceImagesForVirtualFunctions(Img, Device);
772 
773  std::set<RTDeviceBinaryImage *> ImageDeps =
774  collectDeviceImageDepsForImportedSymbols(Img, Device);
775  DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end());
776 
777  std::vector<const RTDeviceBinaryImage *> AllImages;
778  AllImages.reserve(ImageDeps.size() + 1);
779  AllImages.push_back(&Img);
780  std::copy(ImageDeps.begin(), ImageDeps.end(), std::back_inserter(AllImages));
781 
782  auto BuildF = [this, &Img, &Context, &ContextImpl, &Device, &CompileOpts,
783  &LinkOpts, SpecConsts, &DeviceImagesToLink, &AllImages] {
784  const PluginPtr &Plugin = ContextImpl->getPlugin();
785  applyOptionsFromImage(CompileOpts, LinkOpts, Img, {Device}, Plugin);
786  // Should always come last!
789  auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram(
790  Img, AllImages, Context, Device, CompileOpts + LinkOpts, SpecConsts);
791 
792  if (!DeviceCodeWasInCache) {
793  if (Img.supportsSpecConstants())
794  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
795  }
796 
797  ProgramPtr ProgramManaged(NativePrg, urProgramRelease);
798 
799  // Link a fallback implementation of device libraries if they are not
800  // supported by a device compiler.
801  // Pre-compiled programs (after AOT compilation or read from persitent
802  // cache) are supposed to be already linked.
803  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
804  // no fallback device library will be linked.
805  uint32_t DeviceLibReqMask = 0;
806  if (!DeviceCodeWasInCache &&
809  DeviceLibReqMask = getDeviceLibReqMask(Img);
810 
811  std::vector<ur_program_handle_t> ProgramsToLink;
812  // If we had a program in cache, then it should have been the fully linked
813  // program already.
814  if (!DeviceCodeWasInCache) {
815  for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) {
816  device_image_plain DevImagePlain =
817  getDeviceImageFromBinaryImage(BinImg, Context, Device);
818  const std::shared_ptr<detail::device_image_impl> &DeviceImageImpl =
819  detail::getSyclObjImpl(DevImagePlain);
820 
821  SerializedObj ImgSpecConsts =
822  DeviceImageImpl->get_spec_const_blob_ref();
823 
824  ur_program_handle_t NativePrg =
825  createURProgram(*BinImg, Context, Device);
826 
827  if (BinImg->supportsSpecConstants())
828  setSpecializationConstants(DeviceImageImpl, NativePrg, Plugin);
829 
830  ProgramsToLink.push_back(NativePrg);
831  }
832  }
833  ProgramPtr BuiltProgram =
834  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
835  getSyclObjImpl(Device).get()->getHandleRef(), DeviceLibReqMask,
836  ProgramsToLink);
837  // Those extra programs won't be used anymore, just the final linked result
838  for (ur_program_handle_t Prg : ProgramsToLink)
839  Plugin->call(urProgramRelease, Prg);
840 
841  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
842 
843  {
844  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
845  NativePrograms.insert({BuiltProgram.get(), &Img});
846  for (RTDeviceBinaryImage *LinkedImg : DeviceImagesToLink) {
847  NativePrograms.insert({BuiltProgram.get(), LinkedImg});
848  }
849  }
850 
851  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
852 
853  // Save program to persistent cache if it is not there
854  if (!DeviceCodeWasInCache) {
855  PersistentDeviceCodeCache::putItemToDisc(Device, AllImages, SpecConsts,
856  CompileOpts + LinkOpts,
857  BuiltProgram.get());
858  }
859  return BuiltProgram.release();
860  };
861 
862  uint32_t ImgId = Img.getImageID();
863  const ur_device_handle_t UrDevice = Dev->getHandleRef();
864  auto CacheKey =
865  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), UrDevice);
866 
867  auto GetCachedBuildF = [&Cache, &CacheKey]() {
868  return Cache.getOrInsertProgram(CacheKey);
869  };
870 
872  return BuildF();
873 
874  auto BuildResult = Cache.getOrBuild<errc::build>(GetCachedBuildF, BuildF);
875  // getOrBuild is not supposed to return nullptr
876  assert(BuildResult != nullptr && "Invalid build result");
877 
878  ur_program_handle_t ResProgram = BuildResult->Val;
879  auto Plugin = ContextImpl->getPlugin();
880 
881  // If we linked any extra device images, then we need to
882  // cache them as well.
883  for (const RTDeviceBinaryImage *BImg : DeviceImagesToLink) {
884  // CacheKey is captured by reference by GetCachedBuildF, so we can simply
885  // update it here and re-use that lambda.
886  CacheKey.first.second = BImg->getImageID();
887  bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram);
888  if (DidInsert) {
889  // For every cached copy of the program, we need to increment its refcount
890  Plugin->call(urProgramRetain, ResProgram);
891  }
892  }
893 
894  // If caching is enabled, one copy of the program handle will be
895  // stored in the cache, and one handle is returned to the
896  // caller. In that case, we need to increase the ref count of the
897  // program.
898  ContextImpl->getPlugin()->call(urProgramRetain, ResProgram);
899  return ResProgram;
900 }
901 
902 // When caching is enabled, the returned UrProgram and UrKernel will
903 // already have their ref count incremented.
904 std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
905  ur_program_handle_t>
907  const DeviceImplPtr &DeviceImpl,
908  const std::string &KernelName,
909  const NDRDescT &NDRDesc) {
910  if constexpr (DbgProgMgr > 0) {
911  std::cerr << ">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get()
912  << ", " << DeviceImpl.get() << ", " << KernelName << ")\n";
913  }
914 
915  using KernelArgMaskPairT = KernelProgramCache::KernelArgMaskPairT;
916 
917  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
918 
919  std::string CompileOpts, LinkOpts;
920  SerializedObj SpecConsts;
921  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
922  // Should always come last!
925  ur_device_handle_t UrDevice = DeviceImpl->getHandleRef();
926 
927  auto key = std::make_tuple(std::move(SpecConsts), UrDevice,
928  CompileOpts + LinkOpts, KernelName);
930  auto ret_tuple = Cache.tryToGetKernelFast(key);
931  constexpr size_t Kernel = 0; // see KernelFastCacheValT tuple
932  constexpr size_t Program = 3; // see KernelFastCacheValT tuple
933  if (std::get<Kernel>(ret_tuple)) {
934  // Pulling a copy of a kernel and program from the cache,
935  // so we need to retain those resources.
936  ContextImpl->getPlugin()->call(urKernelRetain,
937  std::get<Kernel>(ret_tuple));
938  ContextImpl->getPlugin()->call(urProgramRetain,
939  std::get<Program>(ret_tuple));
940  return ret_tuple;
941  }
942  }
943 
944  ur_program_handle_t Program =
945  getBuiltURProgram(ContextImpl, DeviceImpl, KernelName, NDRDesc);
946 
947  auto BuildF = [this, &Program, &KernelName, &ContextImpl] {
948  ur_kernel_handle_t Kernel = nullptr;
949 
950  const PluginPtr &Plugin = ContextImpl->getPlugin();
951  Plugin->call<errc::kernel_not_supported>(urKernelCreate, Program,
952  KernelName.c_str(), &Kernel);
953 
954  // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it.
955  if (ContextImpl->getPlatformImpl()->supports_usm()) {
956  // Some UR Plugins (like OpenCL) require this call to enable USM
957  // For others, UR will turn this into a NOP.
958  const ur_bool_t UrTrue = true;
959  Plugin->call(urKernelSetExecInfo, Kernel,
960  UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t),
961  nullptr, &UrTrue);
962  }
963 
964  const KernelArgMask *ArgMask = nullptr;
965  if (!m_UseSpvFile)
966  ArgMask = getEliminatedKernelArgMask(Program, KernelName);
967  return std::make_pair(Kernel, ArgMask);
968  };
969 
970  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
971  return Cache.getOrInsertKernel(Program, KernelName);
972  };
973 
975  // The built kernel cannot be shared between multiple
976  // threads when caching is disabled, so we can return
977  // nullptr for the mutex.
978  auto [Kernel, ArgMask] = BuildF();
979  return make_tuple(Kernel, nullptr, ArgMask, Program);
980  }
981 
982  auto BuildResult = Cache.getOrBuild<errc::invalid>(GetCachedBuildF, BuildF);
983  // getOrBuild is not supposed to return nullptr
984  assert(BuildResult != nullptr && "Invalid build result");
985  const KernelArgMaskPairT &KernelArgMaskPair = BuildResult->Val;
986  auto ret_val = std::make_tuple(KernelArgMaskPair.first,
987  &(BuildResult->MBuildResultMutex),
988  KernelArgMaskPair.second, Program);
989  // If caching is enabled, one copy of the kernel handle will be
990  // stored in the cache, and one handle is returned to the
991  // caller. In that case, we need to increase the ref count of the
992  // kernel.
993  ContextImpl->getPlugin()->call(urKernelRetain, KernelArgMaskPair.first);
994  Cache.saveKernel(key, ret_val);
995  return ret_val;
996 }
997 
998 ur_program_handle_t
1000  const ContextImplPtr Context) {
1001  ur_program_handle_t Program;
1002  const PluginPtr &Plugin = Context->getPlugin();
1003  Plugin->call(urKernelGetInfo, Kernel, UR_KERNEL_INFO_PROGRAM,
1004  sizeof(ur_program_handle_t), &Program, nullptr);
1005  return Program;
1006 }
1007 
1008 std::string
1009 ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program,
1010  const ContextImplPtr Context) {
1011  size_t URDevicesSize = 0;
1012  const PluginPtr &Plugin = Context->getPlugin();
1013  Plugin->call(urProgramGetInfo, Program, UR_PROGRAM_INFO_DEVICES, 0, nullptr,
1014  &URDevicesSize);
1015  std::vector<ur_device_handle_t> URDevices(URDevicesSize /
1016  sizeof(ur_device_handle_t));
1017  Plugin->call(urProgramGetInfo, Program, UR_PROGRAM_INFO_DEVICES,
1018  URDevicesSize, URDevices.data(), nullptr);
1019  std::string Log = "The program was built for " +
1020  std::to_string(URDevices.size()) + " devices";
1021  for (ur_device_handle_t &Device : URDevices) {
1022  std::string DeviceBuildInfoString;
1023  size_t DeviceBuildInfoStrSize = 0;
1024  Plugin->call(urProgramGetBuildInfo, Program, Device,
1025  UR_PROGRAM_BUILD_INFO_LOG, 0, nullptr,
1026  &DeviceBuildInfoStrSize);
1027  if (DeviceBuildInfoStrSize > 0) {
1028  std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
1029  Plugin->call(urProgramGetBuildInfo, Program, Device,
1030  UR_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize,
1031  DeviceBuildInfo.data(), nullptr);
1032  DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
1033  }
1034 
1035  std::string DeviceNameString;
1036  size_t DeviceNameStrSize = 0;
1037  Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_NAME, 0, nullptr,
1038  &DeviceNameStrSize);
1039  if (DeviceNameStrSize > 0) {
1040  std::vector<char> DeviceName(DeviceNameStrSize);
1041  Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_NAME,
1042  DeviceNameStrSize, DeviceName.data(), nullptr);
1043  DeviceNameString = std::string(DeviceName.data());
1044  }
1045  Log += "\nBuild program log for '" + DeviceNameString + "':\n" +
1046  DeviceBuildInfoString;
1047  }
1048  return Log;
1049 }
1050 
1051 // TODO device libraries may use scpecialization constants, manifest files, etc.
1052 // To support that they need to be delivered in a different container - so that
1053 // sycl_device_binary_struct can be created for each of them.
1054 static bool loadDeviceLib(const ContextImplPtr Context, const char *Name,
1055  ur_program_handle_t &Prog) {
1056  std::string LibSyclDir = OSUtil::getCurrentDSODir();
1057  std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
1058  std::ifstream::in | std::ifstream::binary);
1059  if (!File.good()) {
1060  return false;
1061  }
1062 
1063  File.seekg(0, std::ios::end);
1064  size_t FileSize = File.tellg();
1065  File.seekg(0, std::ios::beg);
1066  std::vector<char> FileContent(FileSize);
1067  File.read(&FileContent[0], FileSize);
1068  File.close();
1069 
1070  Prog =
1071  createSpirvProgram(Context, (unsigned char *)&FileContent[0], FileSize);
1072  return Prog != nullptr;
1073 }
1074 
1075 // For each extension, a pair of library names. The first uses native support,
1076 // the second emulates functionality in software.
1077 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
1080  {nullptr, "libsycl-fallback-cassert.spv"}},
1082  {nullptr, "libsycl-fallback-cmath.spv"}},
1084  {nullptr, "libsycl-fallback-cmath-fp64.spv"}},
1086  {nullptr, "libsycl-fallback-complex.spv"}},
1088  {nullptr, "libsycl-fallback-complex-fp64.spv"}},
1090  {nullptr, "libsycl-fallback-cstring.spv"}},
1092  {nullptr, "libsycl-fallback-imf.spv"}},
1094  {nullptr, "libsycl-fallback-imf-fp64.spv"}},
1096  {nullptr, "libsycl-fallback-imf-bf16.spv"}},
1098  {"libsycl-native-bfloat16.spv", "libsycl-fallback-bfloat16.spv"}}};
1099 
1100 static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) {
1101  auto LibPair = DeviceLibNames.find(Extension);
1102  const char *Lib = nullptr;
1103  if (LibPair != DeviceLibNames.end())
1104  Lib = Native ? LibPair->second.first : LibPair->second.second;
1105  if (Lib == nullptr)
1107  "Unhandled (new?) device library extension");
1108  return Lib;
1109 }
1110 
1111 // For each extension understood by the SYCL runtime, the string representation
1112 // of its name. Names with devicelib in them are internal to the runtime. Others
1113 // are actual OpenCL extensions.
1114 static const std::map<DeviceLibExt, const char *> DeviceLibExtensionStrs = {
1115  {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"},
1116  {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"},
1118  "cl_intel_devicelib_math_fp64"},
1119  {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"},
1121  "cl_intel_devicelib_complex_fp64"},
1122  {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"},
1123  {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"},
1124  {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"},
1125  {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"},
1127  "cl_intel_bfloat16_conversions"}};
1128 
1129 static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) {
1130  auto Ext = DeviceLibExtensionStrs.find(Extension);
1131  if (Ext == DeviceLibExtensionStrs.end())
1133  "Unhandled (new?) device library extension");
1134  return Ext->second;
1135 }
1136 
1137 static ur_result_t doCompile(const PluginPtr &Plugin,
1138  ur_program_handle_t Program, uint32_t NumDevs,
1139  ur_device_handle_t *Devs, ur_context_handle_t Ctx,
1140  const char *Opts) {
1141  // Try to compile with given devices, fall back to compiling with the program
1142  // context if unsupported by the adapter
1143  auto Result =
1144  Plugin->call_nocheck(urProgramCompileExp, Program, NumDevs, Devs, Opts);
1145  if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1146  return Plugin->call_nocheck(urProgramCompile, Ctx, Program, Opts);
1147  }
1148  return Result;
1149 }
1150 
1151 static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context,
1152  DeviceLibExt Extension,
1153  ur_device_handle_t Device,
1154  bool UseNativeLib) {
1155 
1156  auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib);
1157 
1158  auto LockedCache = Context->acquireCachedLibPrograms();
1159  auto CachedLibPrograms = LockedCache.get();
1160  auto CacheResult = CachedLibPrograms.emplace(
1161  std::make_pair(std::make_pair(Extension, Device), nullptr));
1162  bool Cached = !CacheResult.second;
1163  auto LibProgIt = CacheResult.first;
1164  ur_program_handle_t &LibProg = LibProgIt->second;
1165 
1166  if (Cached)
1167  return LibProg;
1168 
1169  if (!loadDeviceLib(Context, LibFileName, LibProg)) {
1170  CachedLibPrograms.erase(LibProgIt);
1172  std::string("Failed to load ") + LibFileName);
1173  }
1174 
1175  const PluginPtr &Plugin = Context->getPlugin();
1176  // TODO no spec constants are used in the std libraries, support in the future
1177  // Do not use compile options for library programs: it is not clear if user
1178  // options (image options) are supposed to be applied to library program as
1179  // well, and what actually happens to a SPIR-V program if we apply them.
1180  ur_result_t Error =
1181  doCompile(Plugin, LibProg, 1, &Device, Context->getHandleRef(), "");
1182  if (Error != UR_RESULT_SUCCESS) {
1183  CachedLibPrograms.erase(LibProgIt);
1184  throw detail::set_ur_error(
1186  ProgramManager::getProgramBuildLog(LibProg, Context)),
1187  Error);
1188  }
1189 
1190  return LibProg;
1191 }
1192 
1193 ProgramManager::ProgramManager() : m_AsanFoundInImage(false) {
1194  const char *SpvFile = std::getenv(UseSpvEnv);
1195  // If a SPIR-V file is specified with an environment variable,
1196  // register the corresponding image
1197  if (SpvFile) {
1198  m_UseSpvFile = true;
1199  // The env var requests that the program is loaded from a SPIR-V file on
1200  // disk
1201  std::ifstream File(SpvFile, std::ios::binary);
1202 
1203  if (!File.is_open())
1205  std::string("Can't open file specified via ") +
1206  UseSpvEnv + ": " + SpvFile);
1207  File.seekg(0, std::ios::end);
1208  size_t Size = File.tellg();
1209  std::unique_ptr<char[]> Data(new char[Size]);
1210  File.seekg(0);
1211  File.read(Data.get(), Size);
1212  File.close();
1213  if (!File.good())
1215  std::string("read from ") + SpvFile +
1216  std::string(" failed"));
1217  // No need for a mutex here since all access to these private fields is
1218  // blocked until the construction of the ProgramManager singleton is
1219  // finished.
1220  m_SpvFileImage =
1221  std::make_unique<DynRTDeviceBinaryImage>(std::move(Data), Size);
1222 
1223  if constexpr (DbgProgMgr > 0) {
1224  std::cerr << "loaded device image binary from " << SpvFile << "\n";
1225  std::cerr << "format: " << getFormatStr(m_SpvFileImage->getFormat())
1226  << "\n";
1227  }
1228  }
1229 }
1230 
1232  bool JITCompilationIsRequired) {
1233  if (!JITCompilationIsRequired)
1234  return;
1235  // If the image is already compiled with AOT, throw an exception.
1236  const sycl_device_binary_struct &RawImg = Image->getRawData();
1237  if ((strcmp(RawImg.DeviceTargetSpec,
1239  (strcmp(RawImg.DeviceTargetSpec,
1241  (strcmp(RawImg.DeviceTargetSpec,
1243  throw sycl::exception(sycl::errc::feature_not_supported,
1244  "Recompiling AOT image is not supported");
1245  }
1246 }
1247 
1248 template <typename StorageKey>
1250  const std::unordered_multimap<StorageKey, RTDeviceBinaryImage *> &ImagesSet,
1251  const StorageKey &Key, const context &Context, const device &Device) {
1252  auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key);
1253  if (ItBegin == ItEnd)
1254  return nullptr;
1255 
1256  std::vector<sycl_device_binary> RawImgs(std::distance(ItBegin, ItEnd));
1257  auto It = ItBegin;
1258  for (unsigned I = 0; It != ItEnd; ++It, ++I)
1259  RawImgs[I] = reinterpret_cast<sycl_device_binary>(
1260  const_cast<sycl_device_binary>(&It->second->getRawData()));
1261 
1262  std::vector<ur_device_binary_t> UrBinaries(RawImgs.size());
1263  for (uint32_t BinaryCount = 0; BinaryCount < RawImgs.size(); BinaryCount++) {
1264  UrBinaries[BinaryCount].pDeviceTargetSpec =
1265  getUrDeviceTarget(RawImgs[BinaryCount]->DeviceTargetSpec);
1266  }
1267 
1268  uint32_t ImgInd = 0;
1269  // Ask the native runtime under the given context to choose the device image
1270  // it prefers.
1271  getSyclObjImpl(Context)->getPlugin()->call(
1272  urDeviceSelectBinary, getSyclObjImpl(Device)->getHandleRef(),
1273  UrBinaries.data(), UrBinaries.size(), &ImgInd);
1274  std::advance(ItBegin, ImgInd);
1275  return ItBegin->second;
1276 }
1277 
1278 RTDeviceBinaryImage &
1279 ProgramManager::getDeviceImage(const std::string &KernelName,
1280  const context &Context, const device &Device,
1281  bool JITCompilationIsRequired) {
1282  if constexpr (DbgProgMgr > 0) {
1283  std::cerr << ">>> ProgramManager::getDeviceImage(\"" << KernelName << "\", "
1284  << getSyclObjImpl(Context).get() << ", "
1285  << getSyclObjImpl(Device).get() << ", "
1286  << JITCompilationIsRequired << ")\n";
1287 
1288  std::cerr << "available device images:\n";
1290  }
1291 
1292  if (m_UseSpvFile) {
1293  assert(m_SpvFileImage);
1294  return getDeviceImage(
1295  std::unordered_set<RTDeviceBinaryImage *>({m_SpvFileImage.get()}),
1296  Context, Device, JITCompilationIsRequired);
1297  }
1298 
1299  RTDeviceBinaryImage *Img = nullptr;
1300  {
1301  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1302  if (auto KernelId = m_KernelName2KernelIDs.find(KernelName);
1303  KernelId != m_KernelName2KernelIDs.end()) {
1304  // Kernel ID presence guarantees that we have bin image in the storage.
1305  Img = getBinImageFromMultiMap(m_KernelIDs2BinImage, KernelId->second,
1306  Context, Device);
1307  assert(Img && "No binary image found for kernel id");
1308  } else {
1309  Img = getBinImageFromMultiMap(m_ServiceKernels, KernelName, Context,
1310  Device);
1311  }
1312  }
1313  if (Img) {
1314  CheckJITCompilationForImage(Img, JITCompilationIsRequired);
1315 
1316  if constexpr (DbgProgMgr > 0) {
1317  std::cerr << "selected device image: " << &Img->getRawData() << "\n";
1318  Img->print();
1319  }
1320  return *Img;
1321  }
1322 
1324  "No kernel named " + KernelName + " was found");
1325 }
1326 
1328  const std::unordered_set<RTDeviceBinaryImage *> &ImageSet,
1329  const context &Context, const device &Device,
1330  bool JITCompilationIsRequired) {
1331  assert(ImageSet.size() > 0);
1332 
1333  if constexpr (DbgProgMgr > 0) {
1334  std::cerr << ">>> ProgramManager::getDeviceImage(Custom SPV file "
1335  << getSyclObjImpl(Context).get() << ", "
1336  << getSyclObjImpl(Device).get() << ", "
1337  << JITCompilationIsRequired << ")\n";
1338 
1339  std::cerr << "available device images:\n";
1341  }
1342 
1343  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1344  std::vector<sycl_device_binary> RawImgs(ImageSet.size());
1345  auto ImageIterator = ImageSet.begin();
1346  for (size_t i = 0; i < ImageSet.size(); i++, ImageIterator++)
1347  RawImgs[i] = reinterpret_cast<sycl_device_binary>(
1348  const_cast<sycl_device_binary>(&(*ImageIterator)->getRawData()));
1349  uint32_t ImgInd = 0;
1350  // Ask the native runtime under the given context to choose the device image
1351  // it prefers.
1352 
1353  std::vector<ur_device_binary_t> UrBinaries(RawImgs.size());
1354  for (uint32_t BinaryCount = 0; BinaryCount < RawImgs.size(); BinaryCount++) {
1355  UrBinaries[BinaryCount].pDeviceTargetSpec =
1356  getUrDeviceTarget(RawImgs[BinaryCount]->DeviceTargetSpec);
1357  }
1358 
1359  getSyclObjImpl(Context)->getPlugin()->call(
1360  urDeviceSelectBinary, getSyclObjImpl(Device)->getHandleRef(),
1361  UrBinaries.data(), UrBinaries.size(), &ImgInd);
1362 
1363  ImageIterator = ImageSet.begin();
1364  std::advance(ImageIterator, ImgInd);
1365 
1366  CheckJITCompilationForImage(*ImageIterator, JITCompilationIsRequired);
1367 
1368  if constexpr (DbgProgMgr > 0) {
1369  std::cerr << "selected device image: " << &(*ImageIterator)->getRawData()
1370  << "\n";
1371  (*ImageIterator)->print();
1372  }
1373  return **ImageIterator;
1374 }
1375 
1376 static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) {
1377  uint32_t Mask =
1378  0x1 << (static_cast<uint32_t>(Ext) -
1379  static_cast<uint32_t>(DeviceLibExt::cl_intel_devicelib_assert));
1380  return ((DeviceLibReqMask & Mask) == Mask);
1381 }
1382 
1383 static std::vector<ur_program_handle_t>
1385  const ur_device_handle_t &Device,
1386  uint32_t DeviceLibReqMask) {
1387  std::vector<ur_program_handle_t> Programs;
1388 
1389  std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1391  /* is fallback loaded? */ false},
1401 
1402  // Disable all devicelib extensions requiring fp64 support if at least
1403  // one underlying device doesn't support cl_khr_fp64.
1404  std::string DevExtList =
1405  Context->getPlatformImpl()->getDeviceImpl(Device)->get_device_info_string(
1407  const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64"));
1408 
1409  // Load a fallback library for an extension if the device does not
1410  // support it.
1411  for (auto &Pair : RequiredDeviceLibExt) {
1412  DeviceLibExt Ext = Pair.first;
1413  bool &FallbackIsLoaded = Pair.second;
1414 
1415  if (FallbackIsLoaded) {
1416  continue;
1417  }
1418 
1419  if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) {
1420  continue;
1421  }
1422 
1426  !fp64Support) {
1427  continue;
1428  }
1429 
1430  auto ExtName = getDeviceLibExtensionStr(Ext);
1431 
1432  bool InhibitNativeImpl = false;
1433  if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1434  InhibitNativeImpl = strstr(Env, ExtName) != nullptr;
1435  }
1436 
1437  bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1438  if (!DeviceSupports || InhibitNativeImpl) {
1439  Programs.push_back(
1440  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/false));
1441  FallbackIsLoaded = true;
1442  } else {
1443  // bfloat16 needs native library if device supports it
1445  Programs.push_back(
1446  loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/true));
1447  FallbackIsLoaded = true;
1448  }
1449  }
1450  }
1451  return Programs;
1452 }
1453 
1454 ProgramManager::ProgramPtr ProgramManager::build(
1455  ProgramPtr Program, const ContextImplPtr Context,
1456  const std::string &CompileOptions, const std::string &LinkOptions,
1457  ur_device_handle_t Device, uint32_t DeviceLibReqMask,
1458  const std::vector<ur_program_handle_t> &ExtraProgramsToLink) {
1459 
1460  if constexpr (DbgProgMgr > 0) {
1461  std::cerr << ">>> ProgramManager::build(" << Program.get() << ", "
1462  << CompileOptions << ", " << LinkOptions << ", ... " << Device
1463  << ")\n";
1464  }
1465 
1466  bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1467 
1468  // TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
1469  // We do not link with other device libraries, because it may fail
1470  // due to unrecognized SPIR-V format of those libraries.
1471  if (CompileOptions.find(std::string("-cmc")) != std::string::npos ||
1472  CompileOptions.find(std::string("-vc-codegen")) != std::string::npos)
1473  LinkDeviceLibs = false;
1474 
1475  std::vector<ur_program_handle_t> LinkPrograms;
1476  if (LinkDeviceLibs) {
1477  LinkPrograms = getDeviceLibPrograms(Context, Device, DeviceLibReqMask);
1478  }
1479 
1480  static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK");
1481  static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1');
1482 
1483  const PluginPtr &Plugin = Context->getPlugin();
1484  if (LinkPrograms.empty() && ExtraProgramsToLink.empty() && !ForceLink) {
1485  const std::string &Options = LinkOptions.empty()
1486  ? CompileOptions
1487  : (CompileOptions + " " + LinkOptions);
1488  ur_result_t Error =
1489  Plugin->call_nocheck(urProgramBuildExp, Program.get(),
1490  /*num devices =*/1, &Device, Options.c_str());
1491  if (Error == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1492  Error = Plugin->call_nocheck(urProgramBuild, Context->getHandleRef(),
1493  Program.get(), Options.c_str());
1494  }
1495 
1496  if (Error != UR_RESULT_SUCCESS)
1497  throw detail::set_ur_error(
1498  exception(make_error_code(errc::build),
1499  getProgramBuildLog(Program.get(), Context)),
1500  Error);
1501 
1502  return Program;
1503  }
1504 
1505  // Include the main program and compile/link everything together
1506  auto Res = doCompile(Plugin, Program.get(), /*num devices =*/1, &Device,
1507  Context->getHandleRef(), CompileOptions.c_str());
1508  Plugin->checkUrResult<errc::build>(Res);
1509  LinkPrograms.push_back(Program.get());
1510 
1511  for (ur_program_handle_t Prg : ExtraProgramsToLink) {
1512  auto Result =
1513  Plugin->call_nocheck(urProgramCompileExp, Prg, /* num devices =*/1,
1514  &Device, CompileOptions.c_str());
1515  if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1516  Plugin->call(urProgramCompile, Context->getHandleRef(), Prg,
1517  CompileOptions.c_str());
1518  }
1519  Plugin->checkUrResult(Result);
1520 
1521  LinkPrograms.push_back(Prg);
1522  }
1523 
1524  ur_program_handle_t LinkedProg = nullptr;
1525  auto doLink = [&] {
1526  auto Res = Plugin->call_nocheck(urProgramLinkExp, Context->getHandleRef(),
1527  /*num devices =*/1, &Device,
1528  LinkPrograms.size(), LinkPrograms.data(),
1529  LinkOptions.c_str(), &LinkedProg);
1530  if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1531  Res = Plugin->call_nocheck(urProgramLink, Context->getHandleRef(),
1532  LinkPrograms.size(), LinkPrograms.data(),
1533  LinkOptions.c_str(), &LinkedProg);
1534  }
1535  return Res;
1536  };
1537  ur_result_t Error = doLink();
1538  if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES ||
1539  Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
1540  Context->getKernelProgramCache().reset();
1541  Error = doLink();
1542  }
1543 
1544  // Link program call returns a new program object if all parameters are valid,
1545  // or NULL otherwise. Release the original (user) program.
1546  Program.reset(LinkedProg);
1547  if (Error != UR_RESULT_SUCCESS) {
1548  if (LinkedProg) {
1549  // A non-trivial error occurred during linkage: get a build log, release
1550  // an incomplete (but valid) LinkedProg, and throw.
1551  throw detail::set_ur_error(
1552  exception(make_error_code(errc::build),
1553  getProgramBuildLog(LinkedProg, Context)),
1554  Error);
1555  }
1556  Plugin->checkUrResult(Error);
1557  }
1558  return Program;
1559 }
1560 
1561 void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) {
1562  const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1563  Img.getAssertUsed();
1564  if (AssertUsedRange.isAvailable())
1565  for (const auto &Prop : AssertUsedRange)
1566  m_KernelUsesAssert.insert(Prop->Name);
1567 }
1568 
1569 bool ProgramManager::kernelUsesAssert(const std::string &KernelName) const {
1570  return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
1571 }
1572 
1574  const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1575  for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) {
1576  sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]);
1577  const sycl_offload_entry EntriesB = RawImg->EntriesBegin;
1578  const sycl_offload_entry EntriesE = RawImg->EntriesEnd;
1579  // Treat the image as empty one
1580  if (EntriesB == EntriesE)
1581  continue;
1582 
1583  auto Img = std::make_unique<RTDeviceBinaryImage>(RawImg);
1584  static uint32_t SequenceID = 0;
1585 
1586  // Fill the kernel argument mask map
1587  const RTDeviceBinaryImage::PropertyRange &KPOIRange =
1588  Img->getKernelParamOptInfo();
1589  if (KPOIRange.isAvailable()) {
1590  KernelNameToArgMaskMap &ArgMaskMap =
1591  m_EliminatedKernelArgMasks[Img.get()];
1592  for (const auto &Info : KPOIRange)
1593  ArgMaskMap[Info->Name] =
1594  createKernelArgMask(DeviceBinaryProperty(Info).asByteArray());
1595  }
1596 
1597  // Fill maps for kernel bundles
1598  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1599 
1600  // Register all exported symbols
1601  for (const sycl_device_binary_property &ESProp :
1602  Img->getExportedSymbols()) {
1603  m_ExportedSymbolImages.insert({ESProp->Name, Img.get()});
1604  }
1605 
1606  // Record mapping between virtual function sets and device images
1607  for (const sycl_device_binary_property &VFProp :
1608  Img->getVirtualFunctions()) {
1609  std::string StrValue = DeviceBinaryProperty(VFProp).asCString();
1610  for (const auto &SetName : detail::split_string(StrValue, ','))
1611  m_VFSet2BinImage[SetName].insert(Img.get());
1612  }
1613 
1614  if (DumpImages) {
1615  const bool NeedsSequenceID = std::any_of(
1616  m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(),
1617  [&](auto &CurrentImg) {
1618  return CurrentImg.first->getFormat() == Img->getFormat();
1619  });
1620  dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0);
1621  }
1622 
1623  m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);
1624 
1625  for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1626  ++EntriesIt) {
1627 
1628  // Skip creating unique kernel ID if it is a service kernel.
1629  // SYCL service kernels are identified by having
1630  // __sycl_service_kernel__ in the mangled name, primarily as part of
1631  // the namespace of the name type.
1632  if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
1633  m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
1634  continue;
1635  }
1636 
1637  // Skip creating unique kernel ID if it is an exported device
1638  // function. Exported device functions appear in the offload entries
1639  // among kernels, but are identifiable by being listed in properties.
1640  if (m_ExportedSymbolImages.find(EntriesIt->name) !=
1641  m_ExportedSymbolImages.end())
1642  continue;
1643 
1644  // ... and create a unique kernel ID for the entry
1645  auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1646  if (It == m_KernelName2KernelIDs.end()) {
1647  std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1648  std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1649  sycl::kernel_id KernelID =
1650  detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1651 
1652  It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
1653  }
1654  m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1655  m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1656  }
1657 
1658  cacheKernelUsesAssertInfo(*Img);
1659 
1660  // check if kernel uses asan
1661  {
1662  sycl_device_binary_property Prop = Img->getProperty("asanUsed");
1663  m_AsanFoundInImage |=
1664  Prop && (detail::DeviceBinaryProperty(Prop).asUint32() != 0);
1665  }
1666 
1667  // Sort kernel ids for faster search
1668  std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1669  m_BinImg2KernelIDs[Img.get()]->end(), LessByHash<kernel_id>{});
1670 
1671  // ... and initialize associated device_global information
1672  {
1673  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1674 
1675  auto DeviceGlobals = Img->getDeviceGlobals();
1676  for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) {
1677  ByteArray DeviceGlobalInfo =
1678  DeviceBinaryProperty(DeviceGlobal).asByteArray();
1679 
1680  // The supplied device_global info property is expected to contain:
1681  // * 8 bytes - Size of the property.
1682  // * 4 bytes - Size of the underlying type in the device_global.
1683  // * 4 bytes - 0 if device_global has device_image_scope and any value
1684  // otherwise.
1685  DeviceGlobalInfo.dropBytes(8);
1686  auto [TypeSize, DeviceImageScopeDecorated] =
1687  DeviceGlobalInfo.consume<std::uint32_t, std::uint32_t>();
1688  assert(DeviceGlobalInfo.empty() && "Extra data left!");
1689 
1690  // Give the image pointer as an identifier for the image the
1691  // device-global is associated with.
1692 
1693  auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1694  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1695  // If it has already been registered we update the information.
1696  ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize,
1697  DeviceImageScopeDecorated);
1698  } else {
1699  // If it has not already been registered we create a new entry.
1700  // Note: Pointer to the device global is not available here, so it
1701  // cannot be set until registration happens.
1702  auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1703  DeviceGlobal->Name, Img.get(), TypeSize,
1704  DeviceImageScopeDecorated);
1705  m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1706  }
1707  }
1708  }
1709  // ... and initialize associated host_pipe information
1710  {
1711  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1712  auto HostPipes = Img->getHostPipes();
1713  for (const sycl_device_binary_property &HostPipe : HostPipes) {
1714  ByteArray HostPipeInfo = DeviceBinaryProperty(HostPipe).asByteArray();
1715 
1716  // The supplied host_pipe info property is expected to contain:
1717  // * 8 bytes - Size of the property.
1718  // * 4 bytes - Size of the underlying type in the host_pipe.
1719  // Note: Property may be padded.
1720 
1721  HostPipeInfo.dropBytes(8);
1722  auto TypeSize = HostPipeInfo.consume<std::uint32_t>();
1723  assert(HostPipeInfo.empty() && "Extra data left!");
1724 
1725  auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name);
1726  if (ExistingHostPipe != m_HostPipes.end()) {
1727  // If it has already been registered we update the information.
1728  ExistingHostPipe->second->initialize(TypeSize);
1729  ExistingHostPipe->second->initialize(Img.get());
1730  } else {
1731  // If it has not already been registered we create a new entry.
1732  // Note: Pointer to the host pipe is not available here, so it
1733  // cannot be set until registration happens.
1734  auto EntryUPtr =
1735  std::make_unique<HostPipeMapEntry>(HostPipe->Name, TypeSize);
1736  EntryUPtr->initialize(Img.get());
1737  m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr));
1738  }
1739  }
1740  }
1741  m_DeviceImages.insert(std::move(Img));
1742  }
1743 }
1744 
1746  for (const auto &ImgIt : m_BinImg2KernelIDs) {
1747  ImgIt.first->print();
1748  }
1749 }
1750 
1751 void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img,
1752  uint32_t SequenceID) const {
1753  const char *Prefix = std::getenv("SYCL_DUMP_IMAGES_PREFIX");
1754  std::string Fname(Prefix ? Prefix : "sycl_");
1755  const sycl_device_binary_struct &RawImg = Img.getRawData();
1756  Fname += RawImg.DeviceTargetSpec;
1757  if (SequenceID)
1758  Fname += '_' + std::to_string(SequenceID);
1759  std::string Ext;
1760 
1761  ur::DeviceBinaryType Format = Img.getFormat();
1762  if (Format == SYCL_DEVICE_BINARY_TYPE_SPIRV)
1763  Ext = ".spv";
1764  else if (Format == SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE)
1765  Ext = ".bc";
1766  else
1767  Ext = ".bin";
1768  Fname += Ext;
1769 
1770  std::ofstream F(Fname, std::ios::binary);
1771 
1772  if (!F.is_open()) {
1773  throw exception(make_error_code(errc::runtime), "Can not write " + Fname);
1774  }
1775  Img.dump(F);
1776  F.close();
1777 }
1778 
1780  const RTDeviceBinaryImage::PropertyRange &DLMRange =
1781  Img.getDeviceLibReqMask();
1782  if (DLMRange.isAvailable())
1783  return DeviceBinaryProperty(*(DLMRange.begin())).asUint32();
1784  else
1785  return 0x0;
1786 }
1787 
1788 const KernelArgMask *
1789 ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg,
1790  const std::string &KernelName) {
1791  // Bail out if there are no eliminated kernel arg masks in our images
1792  if (m_EliminatedKernelArgMasks.empty())
1793  return nullptr;
1794 
1795  {
1796  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1797  auto Range = NativePrograms.equal_range(NativePrg);
1798  for (auto ImgIt = Range.first; ImgIt != Range.second; ++ImgIt) {
1799  auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1800  if (MapIt == m_EliminatedKernelArgMasks.end())
1801  continue;
1802  auto ArgMaskMapIt = MapIt->second.find(KernelName);
1803  if (ArgMaskMapIt != MapIt->second.end())
1804  return &MapIt->second[KernelName];
1805  }
1806  if (Range.first != Range.second)
1807  return nullptr;
1808  }
1809 
1810  // If the program was not cached iterate over all available images looking for
1811  // the requested kernel
1812  for (auto &Elem : m_EliminatedKernelArgMasks) {
1813  auto ArgMask = Elem.second.find(KernelName);
1814  if (ArgMask != Elem.second.end())
1815  return &ArgMask->second;
1816  }
1817 
1818  // The kernel is not generated by DPCPP stack, so a mask doesn't exist for it
1819  return nullptr;
1820 }
1821 
1823  auto IsAOTBinary = [](const char *Format) {
1824  return ((strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) ||
1825  (strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) ||
1826  (strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0));
1827  };
1828 
1829  // There are only two initial states so far - SPIRV which needs to be compiled
1830  // and linked and fully compiled(AOTed) binary
1831 
1832  const bool IsAOT = IsAOTBinary(BinImage->getRawData().DeviceTargetSpec);
1833 
1834  return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1835 }
1836 
1837 kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) {
1838  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1839 
1840  auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1841  if (KernelID == m_KernelName2KernelIDs.end())
1843  "No kernel found with the specified name");
1844 
1845  return KernelID->second;
1846 }
1847 
1849  std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1850 
1851  return std::any_of(
1852  m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1853  [&](std::pair<RTDeviceBinaryImage *,
1854  std::shared_ptr<std::vector<kernel_id>>>
1855  Elem) { return compatibleWithDevice(Elem.first, Dev); });
1856 }
1857 
1858 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1859  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1860 
1861  std::vector<sycl::kernel_id> AllKernelIDs;
1862  AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1863  for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1864  AllKernelIDs.push_back(KernelID.second);
1865  }
1866  return AllKernelIDs;
1867 }
1868 
1869 kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) {
1870  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1871 
1872  auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1873  if (KernelID == m_BuiltInKernelIDs.end()) {
1874  auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1875  auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1876  KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1877  }
1878 
1879  return KernelID->second;
1880 }
1881 
1882 void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
1883  const char *UniqueId) {
1884  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1885 
1886  auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1887  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1888  // Update the existing information and add the entry to the pointer map.
1889  ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1890  m_Ptr2DeviceGlobal.insert(
1891  {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1892  return;
1893  }
1894 
1895  auto EntryUPtr =
1896  std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1897  auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1898  m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1899 }
1900 
1901 std::set<RTDeviceBinaryImage *>
1902 ProgramManager::getRawDeviceImages(const std::vector<kernel_id> &KernelIDs) {
1903  std::set<RTDeviceBinaryImage *> BinImages;
1904  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1905  for (const kernel_id &KID : KernelIDs) {
1906  auto Range = m_KernelIDs2BinImage.equal_range(KID);
1907  for (auto It = Range.first, End = Range.second; It != End; ++It)
1908  BinImages.insert(It->second);
1909  }
1910  return BinImages;
1911 }
1912 
1914 ProgramManager::getDeviceGlobalEntry(const void *DeviceGlobalPtr) {
1915  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1916  auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1917  assert(Entry != m_Ptr2DeviceGlobal.end() && "Device global entry not found");
1918  return Entry->second;
1919 }
1920 
1921 std::vector<DeviceGlobalMapEntry *> ProgramManager::getDeviceGlobalEntries(
1922  const std::vector<std::string> &UniqueIds,
1923  bool ExcludeDeviceImageScopeDecorated) {
1924  std::vector<DeviceGlobalMapEntry *> FoundEntries;
1925  FoundEntries.reserve(UniqueIds.size());
1926 
1927  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1928  for (const std::string &UniqueId : UniqueIds) {
1929  auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1930  assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1931  "Device global not found in map.");
1932  if (!ExcludeDeviceImageScopeDecorated ||
1933  !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1934  FoundEntries.push_back(DeviceGlobalEntry->second.get());
1935  }
1936  return FoundEntries;
1937 }
1938 
1939 void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr,
1940  const char *UniqueId) {
1941  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1942 
1943  auto ExistingHostPipe = m_HostPipes.find(UniqueId);
1944  if (ExistingHostPipe != m_HostPipes.end()) {
1945  ExistingHostPipe->second->initialize(HostPipePtr);
1946  m_Ptr2HostPipe.insert({HostPipePtr, ExistingHostPipe->second.get()});
1947  return;
1948  }
1949 
1950  auto EntryUPtr = std::make_unique<HostPipeMapEntry>(UniqueId, HostPipePtr);
1951  auto NewEntry = m_HostPipes.emplace(UniqueId, std::move(EntryUPtr));
1952  m_Ptr2HostPipe.insert({HostPipePtr, NewEntry.first->second.get()});
1953 }
1954 
1956 ProgramManager::getHostPipeEntry(const std::string &UniqueId) {
1957  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1958  auto Entry = m_HostPipes.find(UniqueId);
1959  assert(Entry != m_HostPipes.end() && "Host pipe entry not found");
1960  return Entry->second.get();
1961 }
1962 
1964  std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1965  auto Entry = m_Ptr2HostPipe.find(HostPipePtr);
1966  assert(Entry != m_Ptr2HostPipe.end() && "Host pipe entry not found");
1967  return Entry->second;
1968 }
1969 
1971  RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev) {
1972  const bundle_state ImgState = getBinImageState(BinImage);
1973 
1974  assert(compatibleWithDevice(BinImage, Dev));
1975 
1976  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1977  // Collect kernel names for the image.
1978  {
1979  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1980  KernelIDs = m_BinImg2KernelIDs[BinImage];
1981  }
1982 
1983  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1984  BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1985  /*PIProgram=*/nullptr);
1986 
1987  return createSyclObjFromImpl<device_image_plain>(Impl);
1988 }
1989 
1990 std::vector<device_image_plain>
1992  const context &Ctx, const std::vector<device> &Devs,
1993  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs) {
1994 
1995  // Collect unique raw device images taking into account kernel ids passed
1996  // TODO: Can we avoid repacking?
1997  std::set<RTDeviceBinaryImage *> BinImages;
1998  if (!KernelIDs.empty()) {
1999  for (const auto &KID : KernelIDs) {
2000  bool isCompatibleWithAtLeastOneDev =
2001  std::any_of(Devs.begin(), Devs.end(), [&KID](const auto &Dev) {
2002  return sycl::is_compatible({KID}, Dev);
2003  });
2004  if (!isCompatibleWithAtLeastOneDev)
2005  throw sycl::exception(
2007  "Kernel is incompatible with all devices in devs");
2008  }
2009  BinImages = getRawDeviceImages(KernelIDs);
2010  } else {
2011  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2012  for (auto &ImageUPtr : m_BinImg2KernelIDs) {
2013  BinImages.insert(ImageUPtr.first);
2014  }
2015  }
2016 
2017  // Ignore images with incompatible state. Image is considered compatible
2018  // with a target state if an image is already in the target state or can
2019  // be brought to target state by compiling/linking/building.
2020  //
2021  // Example: an image in "executable" state is not compatible with
2022  // "input" target state - there is no operation to convert the image it
2023  // to "input" state. An image in "input" state is compatible with
2024  // "executable" target state because it can be built to get into
2025  // "executable" state.
2026  for (auto It = BinImages.begin(); It != BinImages.end();) {
2027  if (getBinImageState(*It) > TargetState)
2028  It = BinImages.erase(It);
2029  else
2030  ++It;
2031  }
2032 
2033  std::vector<device_image_plain> SYCLDeviceImages;
2034 
2035  // If a non-input state is requested, we can filter out some compatible
2036  // images and return only those with the highest compatible state for each
2037  // device-kernel pair. This map tracks how many kernel-device pairs need each
2038  // image, so that any unneeded ones are skipped.
2039  // TODO this has no effect if the requested state is input, consider having
2040  // a separate branch for that case to avoid unnecessary tracking work.
2041  struct DeviceBinaryImageInfo {
2042  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
2044  int RequirementCounter = 0;
2045  };
2046  std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
2047 
2048  for (const sycl::device &Dev : Devs) {
2049  // Track the highest image state for each requested kernel.
2050  using StateImagesPairT =
2051  std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
2052  using KernelImageMapT =
2053  std::map<kernel_id, StateImagesPairT, LessByNameComp>;
2054  KernelImageMapT KernelImageMap;
2055  if (!KernelIDs.empty())
2056  for (const kernel_id &KernelID : KernelIDs)
2057  KernelImageMap.insert({KernelID, {}});
2058 
2059  for (RTDeviceBinaryImage *BinImage : BinImages) {
2060  if (!compatibleWithDevice(BinImage, Dev) ||
2061  !doesDevSupportDeviceRequirements(Dev, *BinImage))
2062  continue;
2063 
2064  auto InsertRes = ImageInfoMap.insert({BinImage, {}});
2065  DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
2066  if (InsertRes.second) {
2067  ImgInfo.State = getBinImageState(BinImage);
2068  // Collect kernel names for the image
2069  {
2070  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2071  ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
2072  }
2073  }
2074  const bundle_state ImgState = ImgInfo.State;
2075  const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
2076  ImgInfo.KernelIDs;
2077  int &ImgRequirementCounter = ImgInfo.RequirementCounter;
2078 
2079  // If the image does not contain any non-service kernels we can skip it.
2080  if (!ImageKernelIDs || ImageKernelIDs->empty())
2081  continue;
2082 
2083  // Update tracked information.
2084  for (kernel_id &KernelID : *ImageKernelIDs) {
2085  StateImagesPairT *StateImagesPair;
2086  // If only specific kernels are requested, ignore the rest.
2087  if (!KernelIDs.empty()) {
2088  auto It = KernelImageMap.find(KernelID);
2089  if (It == KernelImageMap.end())
2090  continue;
2091  StateImagesPair = &It->second;
2092  } else
2093  StateImagesPair = &KernelImageMap[KernelID];
2094 
2095  auto &[KernelImagesState, KernelImages] = *StateImagesPair;
2096 
2097  if (KernelImages.empty()) {
2098  KernelImagesState = ImgState;
2099  KernelImages.push_back(BinImage);
2100  ++ImgRequirementCounter;
2101  } else if (KernelImagesState < ImgState) {
2102  for (RTDeviceBinaryImage *Img : KernelImages) {
2103  auto It = ImageInfoMap.find(Img);
2104  assert(It != ImageInfoMap.end());
2105  assert(It->second.RequirementCounter > 0);
2106  --(It->second.RequirementCounter);
2107  }
2108  KernelImages.clear();
2109  KernelImages.push_back(BinImage);
2110  KernelImagesState = ImgState;
2111  ++ImgRequirementCounter;
2112  } else if (KernelImagesState == ImgState) {
2113  KernelImages.push_back(BinImage);
2114  ++ImgRequirementCounter;
2115  }
2116  }
2117  }
2118  }
2119 
2120  for (const auto &ImgInfoPair : ImageInfoMap) {
2121  if (ImgInfoPair.second.RequirementCounter == 0)
2122  continue;
2123 
2124  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
2125  ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
2126  ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
2127 
2128  SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
2129  }
2130 
2131  return SYCLDeviceImages;
2132 }
2133 
2134 void ProgramManager::bringSYCLDeviceImagesToState(
2135  std::vector<device_image_plain> &DeviceImages, bundle_state TargetState) {
2136 
2137  for (device_image_plain &DevImage : DeviceImages) {
2138  const bundle_state DevImageState = getSyclObjImpl(DevImage)->get_state();
2139 
2140  // At this time, there is no circumstance where a device image should ever
2141  // be in the source state. That not good.
2142  assert(DevImageState != bundle_state::ext_oneapi_source);
2143 
2144  switch (TargetState) {
2146  // This case added for switch statement completion. We should not be here.
2147  assert(DevImageState == bundle_state::ext_oneapi_source);
2148  break;
2149  case bundle_state::input:
2150  // Do nothing since there is no state which can be upgraded to the input.
2151  assert(DevImageState == bundle_state::input);
2152  break;
2153  case bundle_state::object:
2154  if (DevImageState == bundle_state::input) {
2155  DevImage = compile(DevImage, getSyclObjImpl(DevImage)->get_devices(),
2156  /*PropList=*/{});
2157  break;
2158  }
2159  // Device image is expected to be object state then.
2160  assert(DevImageState == bundle_state::object);
2161  break;
2162  case bundle_state::executable: {
2163  switch (DevImageState) {
2165  // This case added for switch statement completion.
2166  // We should not be here.
2167  assert(DevImageState != bundle_state::ext_oneapi_source);
2168  break;
2169  case bundle_state::input:
2170  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
2171  /*PropList=*/{});
2172  break;
2173  case bundle_state::object: {
2174  std::vector<device_image_plain> LinkedDevImages =
2175  link({DevImage}, getSyclObjImpl(DevImage)->get_devices(),
2176  /*PropList=*/{});
2177  // Since only one device image is passed here one output device image is
2178  // expected
2179  assert(LinkedDevImages.size() == 1 && "Expected one linked image here");
2180  DevImage = LinkedDevImages[0];
2181  break;
2182  }
2184  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
2185  /*PropList=*/{});
2186  break;
2187  }
2188  break;
2189  }
2190  }
2191  }
2192 }
2193 
2194 std::vector<device_image_plain>
2195 ProgramManager::getSYCLDeviceImages(const context &Ctx,
2196  const std::vector<device> &Devs,
2197  bundle_state TargetState) {
2198  // Collect device images with compatible state
2199  std::vector<device_image_plain> DeviceImages =
2200  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
2201  // Bring device images with compatible state to desired state.
2202  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
2203  return DeviceImages;
2204 }
2205 
2206 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
2207  const context &Ctx, const std::vector<device> &Devs,
2208  const DevImgSelectorImpl &Selector, bundle_state TargetState) {
2209  // Collect device images with compatible state
2210  std::vector<device_image_plain> DeviceImages =
2211  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
2212 
2213  // Filter out images that are rejected by Selector
2214  auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
2215  [&Selector](const device_image_plain &Image) {
2216  return !Selector(getSyclObjImpl(Image));
2217  });
2218  DeviceImages.erase(It, DeviceImages.end());
2219 
2220  // The spec says that the function should not call online compiler or linker
2221  // to translate device images into target state
2222  return DeviceImages;
2223 }
2224 
2225 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
2226  const context &Ctx, const std::vector<device> &Devs,
2227  const std::vector<kernel_id> &KernelIDs, bundle_state TargetState) {
2228  // Fast path for when no kernel IDs are requested
2229  if (KernelIDs.empty())
2230  return {};
2231 
2232  {
2233  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
2234 
2235  for (auto &It : m_BuiltInKernelIDs) {
2236  if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
2237  KernelIDs.end())
2239  "Attempting to use a built-in kernel. They are "
2240  "not fully supported");
2241  }
2242  }
2243 
2244  // Collect device images with compatible state
2245  std::vector<device_image_plain> DeviceImages =
2246  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
2247 
2248  // Bring device images with compatible state to desired state.
2249  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
2250  return DeviceImages;
2251 }
2252 
2255  const std::vector<device> &Devs,
2256  const property_list &) {
2257 
2258  // TODO: Extract compile options from property list once the Spec clarifies
2259  // how they can be passed.
2260 
2261  // TODO: Probably we could have cached compiled device images.
2262  const std::shared_ptr<device_image_impl> &InputImpl =
2263  getSyclObjImpl(DeviceImage);
2264 
2265  const PluginPtr &Plugin =
2266  getSyclObjImpl(InputImpl->get_context())->getPlugin();
2267 
2268  // TODO: Add support for creating non-SPIRV programs from multiple devices.
2269  if (InputImpl->get_bin_image_ref()->getFormat() !=
2271  Devs.size() > 1)
2272  // FIXME: It was probably intended to be thrown, but a unittest starts
2273  // failing if we do so, investigate independently of switching to SYCL 2020
2274  // `exception`.
2276  "Creating a program from AOT binary for multiple device is "
2277  "not supported");
2278 
2279  // Device is not used when creating program from SPIRV, so passing only one
2280  // device is OK.
2281  ur_program_handle_t Prog = createURProgram(*InputImpl->get_bin_image_ref(),
2282  InputImpl->get_context(), Devs[0]);
2283 
2284  if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
2285  setSpecializationConstants(InputImpl, Prog, Plugin);
2286 
2287  DeviceImageImplPtr ObjectImpl = std::make_shared<detail::device_image_impl>(
2288  InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
2289  bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog,
2290  InputImpl->get_spec_const_data_ref(),
2291  InputImpl->get_spec_const_blob_ref());
2292 
2293  std::vector<ur_device_handle_t> URDevices;
2294  URDevices.reserve(Devs.size());
2295  for (const device &Dev : Devs)
2296  URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
2297 
2298  // TODO: Handle zero sized Device list.
2299  std::string CompileOptions;
2300  applyCompileOptionsFromEnvironment(CompileOptions);
2302  CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
2303  // Should always come last!
2305  ur_result_t Error = doCompile(
2306  Plugin, ObjectImpl->get_ur_program_ref(), Devs.size(), URDevices.data(),
2307  getSyclObjImpl(InputImpl->get_context()).get()->getHandleRef(),
2308  CompileOptions.c_str());
2309  if (Error != UR_RESULT_SUCCESS)
2310  throw sycl::exception(
2312  getProgramBuildLog(ObjectImpl->get_ur_program_ref(),
2313  getSyclObjImpl(ObjectImpl->get_context())));
2314 
2315  return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2316 }
2317 
2318 std::vector<device_image_plain>
2320  const std::vector<device> &Devs,
2321  const property_list &PropList) {
2322  (void)PropList;
2323 
2324  std::vector<ur_program_handle_t> URPrograms;
2325  URPrograms.push_back(getSyclObjImpl(DeviceImage)->get_ur_program_ref());
2326 
2327  std::vector<ur_device_handle_t> URDevices;
2328  URDevices.reserve(Devs.size());
2329  for (const device &Dev : Devs)
2330  URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
2331 
2332  std::string LinkOptionsStr;
2333  applyLinkOptionsFromEnvironment(LinkOptionsStr);
2334  if (LinkOptionsStr.empty()) {
2335  const std::shared_ptr<device_image_impl> &InputImpl =
2336  getSyclObjImpl(DeviceImage);
2337  appendLinkOptionsFromImage(LinkOptionsStr,
2338  *(InputImpl->get_bin_image_ref()));
2339  }
2340  // Should always come last!
2342  const context &Context = getSyclObjImpl(DeviceImage)->get_context();
2343  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2344  const PluginPtr &Plugin = ContextImpl->getPlugin();
2345 
2346  ur_program_handle_t LinkedProg = nullptr;
2347  auto doLink = [&] {
2348  auto Res = Plugin->call_nocheck(
2349  urProgramLinkExp, ContextImpl->getHandleRef(), URDevices.size(),
2350  URDevices.data(), URPrograms.size(), URPrograms.data(),
2351  LinkOptionsStr.c_str(), &LinkedProg);
2352  if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
2353  Res = Plugin->call_nocheck(urProgramLink, ContextImpl->getHandleRef(),
2354  URPrograms.size(), URPrograms.data(),
2355  LinkOptionsStr.c_str(), &LinkedProg);
2356  }
2357  return Res;
2358  };
2359  ur_result_t Error = doLink();
2360  if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES ||
2361  Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
2362  ContextImpl->getKernelProgramCache().reset();
2363  Error = doLink();
2364  }
2365 
2366  if (Error != UR_RESULT_SUCCESS) {
2367  if (LinkedProg) {
2368  const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2369  throw sycl::exception(make_error_code(errc::build), ErrorMsg);
2370  }
2371  throw set_ur_error(exception(make_error_code(errc::build), "link() failed"),
2372  Error);
2373  }
2374 
2375  std::shared_ptr<std::vector<kernel_id>> KernelIDs{new std::vector<kernel_id>};
2376  std::vector<unsigned char> NewSpecConstBlob;
2377  device_image_impl::SpecConstMapT NewSpecConstMap;
2378 
2379  std::shared_ptr<device_image_impl> DeviceImageImpl =
2380  getSyclObjImpl(DeviceImage);
2381 
2382  // Duplicates are not expected here, otherwise urProgramLink should fail
2383  KernelIDs->insert(KernelIDs->end(),
2384  DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2385  DeviceImageImpl->get_kernel_ids_ptr()->end());
2386 
2387  // To be able to answer queries about specialziation constants, the new
2388  // device image should have the specialization constants from all the linked
2389  // images.
2390  {
2391  const std::lock_guard<std::mutex> SpecConstLock(
2392  DeviceImageImpl->get_spec_const_data_lock());
2393 
2394  // Copy all map entries to the new map. Since the blob will be copied to
2395  // the end of the new blob we need to move the blob offset of each entry.
2396  for (const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) {
2397  std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2398  NewSpecConstMap[SpecConstIt.first];
2399  assert(NewDescEntries.empty() &&
2400  "Specialization constant already exists in the map.");
2401  NewDescEntries.reserve(SpecConstIt.second.size());
2402  for (const device_image_impl::SpecConstDescT &SpecConstDesc :
2403  SpecConstIt.second) {
2404  device_image_impl::SpecConstDescT NewSpecConstDesc = SpecConstDesc;
2405  NewSpecConstDesc.BlobOffset += NewSpecConstBlob.size();
2406  NewDescEntries.push_back(std::move(NewSpecConstDesc));
2407  }
2408  }
2409 
2410  // Copy the blob from the device image into the new blob. This moves the
2411  // offsets of the following blobs.
2412  NewSpecConstBlob.insert(NewSpecConstBlob.end(),
2413  DeviceImageImpl->get_spec_const_blob_ref().begin(),
2414  DeviceImageImpl->get_spec_const_blob_ref().end());
2415  }
2416 
2417  // device_image_impl expects kernel ids to be sorted for fast search
2418  std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash<kernel_id>{});
2419 
2420  auto BinImg = getSyclObjImpl(DeviceImage)->get_bin_image_ref();
2421  DeviceImageImplPtr ExecutableImpl =
2422  std::make_shared<detail::device_image_impl>(
2423  BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs),
2424  LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob));
2425 
2426  // TODO: Make multiple sets of device images organized by devices they are
2427  // compiled for.
2428  return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2429 }
2430 
2431 // The function duplicates most of the code from existing getBuiltPIProgram.
2432 // The differences are:
2433 // Different API - uses different objects to extract required info
2434 // Supports caching of a program built for multiple devices
2436  const std::vector<device> &Devs,
2437  const property_list &PropList) {
2438  (void)PropList;
2439 
2440  const std::shared_ptr<device_image_impl> &InputImpl =
2441  getSyclObjImpl(DeviceImage);
2442 
2443  const context Context = InputImpl->get_context();
2444 
2445  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2446 
2447  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
2448 
2449  std::string CompileOpts;
2450  std::string LinkOpts;
2451  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
2452 
2453  const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref();
2454  const RTDeviceBinaryImage &Img = *ImgPtr;
2455 
2456  SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2457 
2458  // TODO: Unify this code with getBuiltPIProgram
2459  auto BuildF = [this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2460  &InputImpl, SpecConsts] {
2461  ContextImplPtr ContextImpl = getSyclObjImpl(Context);
2462  const PluginPtr &Plugin = ContextImpl->getPlugin();
2463  applyOptionsFromImage(CompileOpts, LinkOpts, Img, Devs, Plugin);
2464  // Should always come last!
2467  // TODO: Add support for creating non-SPIRV programs from multiple devices.
2468  if (InputImpl->get_bin_image_ref()->getFormat() !=
2470  Devs.size() > 1)
2471  // FIXME: It was probably intended to be thrown, but a unittest starts
2472  // failing if we do so, investigate independently of switching to SYCL
2473  // 2020 `exception`.
2475  "Creating a program from AOT binary for multiple device "
2476  "is not supported");
2477 
2478  // Device is not used when creating program from SPIRV, so passing only one
2479  // device is OK.
2480  auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram(
2481  Img, {&Img}, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2482 
2483  if (!DeviceCodeWasInCache &&
2484  InputImpl->get_bin_image_ref()->supportsSpecConstants())
2485  setSpecializationConstants(InputImpl, NativePrg, Plugin);
2486 
2487  ProgramPtr ProgramManaged(NativePrg, urProgramRelease);
2488 
2489  // Link a fallback implementation of device libraries if they are not
2490  // supported by a device compiler.
2491  // Pre-compiled programs are supposed to be already linked.
2492  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
2493  // no fallback device library will be linked.
2494  uint32_t DeviceLibReqMask = 0;
2497  DeviceLibReqMask = getDeviceLibReqMask(Img);
2498 
2499  // TODO: Add support for dynamic linking with kernel bundles
2500  std::vector<ur_program_handle_t> ExtraProgramsToLink;
2501  ProgramPtr BuiltProgram =
2502  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2503  getSyclObjImpl(Devs[0]).get()->getHandleRef(), DeviceLibReqMask,
2504  ExtraProgramsToLink);
2505 
2506  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
2507 
2508  {
2509  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2510  NativePrograms.insert({BuiltProgram.get(), &Img});
2511  }
2512 
2513  ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2514 
2515  // Save program to persistent cache if it is not there
2516  if (!DeviceCodeWasInCache)
2517  PersistentDeviceCodeCache::putItemToDisc(Devs[0], {&Img}, SpecConsts,
2518  CompileOpts + LinkOpts,
2519  BuiltProgram.get());
2520 
2521  return BuiltProgram.release();
2522  };
2523 
2525  auto ResProgram = BuildF();
2526  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
2527  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
2528  InputImpl->get_kernel_ids_ptr(), ResProgram,
2529  InputImpl->get_spec_const_data_ref(),
2530  InputImpl->get_spec_const_blob_ref());
2531 
2532  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2533  }
2534 
2535  uint32_t ImgId = Img.getImageID();
2536  ur_device_handle_t UrDevice = getSyclObjImpl(Devs[0]).get()->getHandleRef();
2537  auto CacheKey =
2538  std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), UrDevice);
2539 
2540  // CacheKey is captured by reference so when we overwrite it later we can
2541  // reuse this function.
2542  auto GetCachedBuildF = [&Cache, &CacheKey]() {
2543  return Cache.getOrInsertProgram(CacheKey);
2544  };
2545 
2546  auto BuildResult = Cache.getOrBuild<errc::build>(GetCachedBuildF, BuildF);
2547  // getOrBuild is not supposed to return nullptr
2548  assert(BuildResult != nullptr && "Invalid build result");
2549 
2550  ur_program_handle_t ResProgram = BuildResult->Val;
2551 
2552  // Cache supports key with once device only, but here we have multiple
2553  // devices a program is built for, so add the program to the cache for all
2554  // other devices.
2555  const PluginPtr &Plugin = ContextImpl->getPlugin();
2556  auto CacheOtherDevices = [ResProgram, &Plugin]() {
2557  Plugin->call(urProgramRetain, ResProgram);
2558  return ResProgram;
2559  };
2560 
2561  // The program for device "0" is already added to the cache during the first
2562  // call to getOrBuild, so starting with "1"
2563  for (size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2564  const ur_device_handle_t UrDeviceAdd =
2565  getSyclObjImpl(Devs[Idx]).get()->getHandleRef();
2566 
2567  // Change device in the cache key to reduce copying of spec const data.
2568  CacheKey.second = UrDeviceAdd;
2569  Cache.getOrBuild<errc::build>(GetCachedBuildF, CacheOtherDevices);
2570  // getOrBuild is not supposed to return nullptr
2571  assert(BuildResult != nullptr && "Invalid build result");
2572  }
2573 
2574  // devive_image_impl shares ownership of PIProgram with, at least, program
2575  // cache. The ref counter will be descremented in the destructor of
2576  // device_image_impl
2577  Plugin->call(urProgramRetain, ResProgram);
2578 
2579  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
2580  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
2581  InputImpl->get_kernel_ids_ptr(), ResProgram,
2582  InputImpl->get_spec_const_data_ref(),
2583  InputImpl->get_spec_const_blob_ref());
2584 
2585  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2586 }
2587 
2588 // When caching is enabled, the returned UrKernel will already have
2589 // its ref count incremented.
2590 std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *>
2591 ProgramManager::getOrCreateKernel(const context &Context,
2592  const std::string &KernelName,
2593  const property_list &PropList,
2594  ur_program_handle_t Program) {
2595 
2596  (void)PropList;
2597 
2598  const ContextImplPtr Ctx = getSyclObjImpl(Context);
2599 
2600  KernelProgramCache &Cache = Ctx->getKernelProgramCache();
2601 
2602  auto BuildF = [this, &Program, &KernelName, &Ctx] {
2603  ur_kernel_handle_t Kernel = nullptr;
2604 
2605  const PluginPtr &Plugin = Ctx->getPlugin();
2606  Plugin->call(urKernelCreate, Program, KernelName.c_str(), &Kernel);
2607 
2608  // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it.
2609  if (Ctx->getPlatformImpl()->supports_usm()) {
2610  bool EnableAccess = true;
2611  Plugin->call(urKernelSetExecInfo, Kernel,
2612  UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t),
2613  nullptr, &EnableAccess);
2614  }
2615 
2616  // Ignore possible m_UseSpvFile for now.
2617  // TODO consider making m_UseSpvFile interact with kernel bundles as well.
2618  const KernelArgMask *KernelArgMask =
2619  getEliminatedKernelArgMask(Program, KernelName);
2620 
2621  return std::make_pair(Kernel, KernelArgMask);
2622  };
2623 
2624  auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2625  return Cache.getOrInsertKernel(Program, KernelName);
2626  };
2627 
2629  // The built kernel cannot be shared between multiple
2630  // threads when caching is disabled, so we can return
2631  // nullptr for the mutex.
2632  auto [Kernel, ArgMask] = BuildF();
2633  return make_tuple(Kernel, nullptr, ArgMask);
2634  }
2635 
2636  auto BuildResult = Cache.getOrBuild<errc::invalid>(GetCachedBuildF, BuildF);
2637  // getOrBuild is not supposed to return nullptr
2638  assert(BuildResult != nullptr && "Invalid build result");
2639  // If caching is enabled, one copy of the kernel handle will be
2640  // stored in the cache, and one handle is returned to the
2641  // caller. In that case, we need to increase the ref count of the
2642  // kernel.
2643  Ctx->getPlugin()->call(urKernelRetain, BuildResult->Val.first);
2644  return std::make_tuple(BuildResult->Val.first,
2645  &(BuildResult->MBuildResultMutex),
2646  BuildResult->Val.second);
2647 }
2648 
2649 ur_kernel_handle_t ProgramManager::getCachedMaterializedKernel(
2650  const std::string &KernelName,
2651  const std::vector<unsigned char> &SpecializationConsts) {
2652  if constexpr (DbgProgMgr > 0)
2653  std::cerr << ">>> ProgramManager::getCachedMaterializedKernel\n"
2654  << "KernelName: " << KernelName << "\n";
2655 
2656  {
2657  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2658  if (auto KnownMaterializations = m_MaterializedKernels.find(KernelName);
2659  KnownMaterializations != m_MaterializedKernels.end()) {
2660  if constexpr (DbgProgMgr > 0)
2661  std::cerr << ">>> There are:" << KnownMaterializations->second.size()
2662  << " materialized kernels.\n";
2663  if (auto Kernel =
2664  KnownMaterializations->second.find(SpecializationConsts);
2665  Kernel != KnownMaterializations->second.end()) {
2666  if constexpr (DbgProgMgr > 0)
2667  std::cerr << ">>> Kernel in the chache\n";
2668  return Kernel->second;
2669  }
2670  }
2671  }
2672 
2673  if constexpr (DbgProgMgr > 0)
2674  std::cerr << ">>> Kernel not in the chache\n";
2675 
2676  return nullptr;
2677 }
2678 
2679 ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel(
2680  const RTDeviceBinaryImage &Img, const context &Context,
2681  const device &Device, const std::string &KernelName,
2682  const std::vector<unsigned char> &SpecializationConsts) {
2683  // Check if we already have the kernel in the cache.
2684  if constexpr (DbgProgMgr > 0)
2685  std::cerr << ">>> ProgramManager::getOrCreateMaterializedKernel\n"
2686  << "KernelName: " << KernelName << "\n";
2687 
2688  if (auto CachedKernel =
2689  getCachedMaterializedKernel(KernelName, SpecializationConsts))
2690  return CachedKernel;
2691 
2692  if constexpr (DbgProgMgr > 0)
2693  std::cerr << ">>> Adding the kernel to the cache.\n";
2694  auto Program = createURProgram(Img, Context, Device);
2695  auto DeviceImpl = detail::getSyclObjImpl(Device);
2696  auto &Plugin = DeviceImpl->getPlugin();
2697  ProgramPtr ProgramManaged(Program, urProgramRelease);
2698 
2699  std::string CompileOpts;
2700  std::string LinkOpts;
2701  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
2702  // No linking of extra programs reqruired.
2703  std::vector<ur_program_handle_t> ExtraProgramsToLink;
2704  auto BuildProgram =
2705  build(std::move(ProgramManaged), detail::getSyclObjImpl(Context),
2706  CompileOpts, LinkOpts, DeviceImpl->getHandleRef(),
2707  /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0,
2708  ExtraProgramsToLink);
2709  ur_kernel_handle_t UrKernel{nullptr};
2710  Plugin->call<errc::kernel_not_supported>(urKernelCreate,
2711  BuildProgram.get(), KernelName.c_str(), &UrKernel);
2712  {
2713  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2714  m_MaterializedKernels[KernelName][SpecializationConsts] = UrKernel;
2715  }
2716 
2717  return UrKernel;
2718 }
2719 
2721  const RTDeviceBinaryImage &Img) {
2722  return !checkDevSupportDeviceRequirements(Dev, Img).has_value();
2723 }
2724 
2725 static std::string getAspectNameStr(sycl::aspect AspectNum) {
2726 #define __SYCL_ASPECT(ASPECT, ID) \
2727  case aspect::ASPECT: \
2728  return #ASPECT;
2729 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
2730 // We don't need "case aspect::usm_allocator" here because it will duplicate
2731 // "case aspect::usm_system_allocations", therefore leave this macro empty
2732 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
2733  switch (AspectNum) {
2734 #include <sycl/info/aspects.def>
2735 #include <sycl/info/aspects_deprecated.def>
2736  }
2738  "Unknown aspect " +
2739  std::to_string(static_cast<unsigned>(AspectNum)));
2740 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2741 #undef __SYCL_ASPECT_DEPRECATED
2742 #undef __SYCL_ASPECT
2743 }
2744 
2745 // Check if the multiplication over unsigned integers overflows
2746 template <typename T>
2747 static std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
2749  if (y == 0)
2750  return 0;
2751  if (x > std::numeric_limits<T>::max() / y)
2752  return {};
2753  else
2754  return x * y;
2755 }
2756 
2757 namespace matrix_ext = ext::oneapi::experimental::matrix;
2758 
2759 // Matrix type string to matrix_type enum value conversion
2760 // Note: matrix type strings are defined in template specialization for
2761 // convertTypeToMatrixTypeString above
2762 std::optional<matrix_ext::matrix_type>
2764  const std::string &MatrixTypeString) {
2765  assert(!MatrixTypeString.empty() &&
2766  "MatrixTypeString type string can't be empty. Check if required "
2767  "template specialization for convertTypeToMatrixTypeString exists.");
2768  std::string_view MatrixTypeStringView = MatrixTypeString;
2769  std::string Prefix("matrix_type::");
2770  assert((MatrixTypeStringView.substr(0, Prefix.size()) == Prefix) &&
2771  "MatrixTypeString has incorrect prefix, should be \"matrix_type::\".");
2772  MatrixTypeStringView.remove_prefix(Prefix.size());
2773  if ("bf16" == MatrixTypeStringView)
2774  return matrix_ext::matrix_type::bf16;
2775  else if ("fp16" == MatrixTypeStringView)
2776  return matrix_ext::matrix_type::fp16;
2777  else if ("tf32" == MatrixTypeStringView)
2778  return matrix_ext::matrix_type::tf32;
2779  else if ("fp32" == MatrixTypeStringView)
2780  return matrix_ext::matrix_type::fp32;
2781  else if ("fp64" == MatrixTypeStringView)
2782  return matrix_ext::matrix_type::fp64;
2783  else if ("sint8" == MatrixTypeStringView)
2784  return matrix_ext::matrix_type::sint8;
2785  else if ("sint16" == MatrixTypeStringView)
2786  return matrix_ext::matrix_type::sint16;
2787  else if ("sint32" == MatrixTypeStringView)
2788  return matrix_ext::matrix_type::sint32;
2789  else if ("sint64" == MatrixTypeStringView)
2790  return matrix_ext::matrix_type::sint64;
2791  else if ("uint8" == MatrixTypeStringView)
2792  return matrix_ext::matrix_type::uint8;
2793  else if ("uint16" == MatrixTypeStringView)
2794  return matrix_ext::matrix_type::uint16;
2795  else if ("uint32" == MatrixTypeStringView)
2796  return matrix_ext::matrix_type::uint32;
2797  else if ("uint64" == MatrixTypeStringView)
2798  return matrix_ext::matrix_type::uint64;
2799  return std::nullopt;
2800 }
2801 
2802 bool isMatrixSupportedByHW(const std::string &MatrixTypeStrUser,
2803  size_t RowsUser, size_t ColsUser,
2804  matrix_ext::matrix_type MatrixTypeRuntime,
2805  size_t MaxRowsRuntime, size_t MaxColsRuntime,
2806  size_t RowsRuntime, size_t ColsRuntime) {
2807  std::optional<matrix_ext::matrix_type> MatrixTypeUserOpt =
2808  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeStrUser);
2809  if (!MatrixTypeUserOpt)
2810  return false;
2811  bool IsMatrixTypeSupported = (MatrixTypeUserOpt.value() == MatrixTypeRuntime);
2812  bool IsRowsSupported = ((RowsRuntime != 0) ? (RowsUser == RowsRuntime)
2813  : (RowsUser <= MaxRowsRuntime));
2814  bool IsColsSupported = ((ColsRuntime != 0) ? (ColsUser == ColsRuntime)
2815  : (ColsUser <= MaxColsRuntime));
2816  return IsMatrixTypeSupported && IsRowsSupported && IsColsSupported;
2817 }
2818 
2819 std::optional<sycl::exception> checkDevSupportJointMatrix(
2820  const std::string &JointMatrixProStr,
2821  const std::vector<ext::oneapi::experimental::matrix::combination>
2822  &SupportedMatrixCombinations) {
2823  std::istringstream JointMatrixStrStream(JointMatrixProStr);
2824  std::string SingleJointMatrix;
2825 
2826  // start to parse the value which is generated by
2827  // SYCLPropagateJointMatrixUsage pass
2828  while (std::getline(JointMatrixStrStream, SingleJointMatrix, ';')) {
2829  std::istringstream SingleJointMatrixStrStream(SingleJointMatrix);
2830  std::vector<std::string> JointMatrixVec;
2831  std::string Item;
2832 
2833  while (std::getline(SingleJointMatrixStrStream, Item, ',')) {
2834  JointMatrixVec.push_back(Item);
2835  }
2836 
2837  assert(JointMatrixVec.size() == 4 &&
2838  "Property set is corrupted, it must have 4 elements.");
2839 
2840  const std::string &MatrixTypeUser = JointMatrixVec[0];
2841  const std::string &UseStrUser = JointMatrixVec[1];
2842  size_t RowsUser, ColsUser = 0;
2843  try {
2844  RowsUser = std::stoi(JointMatrixVec[2]);
2845  ColsUser = std::stoi(JointMatrixVec[3]);
2846  } catch (std::logic_error &) {
2847  // ignore exceptions, one way or another a user will see sycl::exception
2848  // with the message about incorrect rows or cols, because they are
2849  // initialized with 0 above
2850  }
2851 
2852  bool IsMatrixCompatible = false;
2853 
2854  for (const auto &Combination : SupportedMatrixCombinations) {
2855  std::optional<ext::oneapi::experimental::matrix::use> Use =
2856  detail::convertMatrixUseStringToEnum(UseStrUser.c_str());
2857  assert(Use && "Property set has empty matrix::use value.");
2858  switch (Use.value()) {
2859  case matrix_ext::use::a:
2860  IsMatrixCompatible |= isMatrixSupportedByHW(
2861  MatrixTypeUser, RowsUser, ColsUser, Combination.atype,
2862  Combination.max_msize, Combination.max_ksize, Combination.msize,
2863  Combination.ksize);
2864  break;
2865  case matrix_ext::use::b:
2866  IsMatrixCompatible |= isMatrixSupportedByHW(
2867  MatrixTypeUser, RowsUser, ColsUser, Combination.btype,
2868  Combination.max_ksize, Combination.max_nsize, Combination.ksize,
2869  Combination.nsize);
2870  break;
2871  case matrix_ext::use::accumulator: {
2872  IsMatrixCompatible |= isMatrixSupportedByHW(
2873  MatrixTypeUser, RowsUser, ColsUser, Combination.ctype,
2874  Combination.max_msize, Combination.max_nsize, Combination.msize,
2875  Combination.nsize);
2876  IsMatrixCompatible |= isMatrixSupportedByHW(
2877  MatrixTypeUser, RowsUser, ColsUser, Combination.dtype,
2878  Combination.max_msize, Combination.max_nsize, Combination.msize,
2879  Combination.nsize);
2880  break;
2881  }
2882  }
2883 
2884  // early exit if we have a match
2885  if (IsMatrixCompatible)
2886  break;
2887  }
2888 
2889  if (!IsMatrixCompatible)
2891  "joint_matrix with parameters " + MatrixTypeUser +
2892  ", " + UseStrUser +
2893  ", Rows=" + std::to_string(RowsUser) +
2894  ", Cols=" + std::to_string(ColsUser) +
2895  " is not supported on this device");
2896  }
2897  return std::nullopt;
2898 }
2899 
2900 std::optional<sycl::exception> checkDevSupportJointMatrixMad(
2901  const std::string &JointMatrixProStr,
2902  const std::vector<ext::oneapi::experimental::matrix::combination>
2903  &SupportedMatrixCombinations) {
2904  std::istringstream JointMatrixMadStrStream(JointMatrixProStr);
2905  std::string SingleJointMatrixMad;
2906 
2907  // start to parse the value which is generated by
2908  // SYCLPropagateJointMatrixUsage pass
2909  while (std::getline(JointMatrixMadStrStream, SingleJointMatrixMad, ';')) {
2910  std::istringstream SingleJointMatrixMadStrStream(SingleJointMatrixMad);
2911  std::vector<std::string> JointMatrixMadVec;
2912  std::string Item;
2913 
2914  while (std::getline(SingleJointMatrixMadStrStream, Item, ',')) {
2915  JointMatrixMadVec.push_back(Item);
2916  }
2917 
2918  assert(JointMatrixMadVec.size() == 7 &&
2919  "Property set is corrupted, it must have 7 elements.");
2920 
2921  const std::string &MatrixTypeAStrUser = JointMatrixMadVec[0];
2922  const std::string &MatrixTypeBStrUser = JointMatrixMadVec[1];
2923  const std::string &MatrixTypeCStrUser = JointMatrixMadVec[2];
2924  const std::string &MatrixTypeDStrUser = JointMatrixMadVec[3];
2925  size_t MSizeUser, KSizeUser, NSizeUser = 0;
2926  try {
2927  MSizeUser = std::stoi(JointMatrixMadVec[4]);
2928  KSizeUser = std::stoi(JointMatrixMadVec[5]);
2929  NSizeUser = std::stoi(JointMatrixMadVec[6]);
2930  } catch (std::logic_error &) {
2931  // ignore exceptions, one way or another a user will see sycl::exception
2932  // with the message about incorrect size(s), because they are
2933  // initialized with 0 above
2934  }
2935 
2936  std::optional<matrix_ext::matrix_type> MatrixTypeAUserOpt =
2937  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeAStrUser);
2938  std::optional<matrix_ext::matrix_type> MatrixTypeBUserOpt =
2939  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeBStrUser);
2940  std::optional<matrix_ext::matrix_type> MatrixTypeCUserOpt =
2941  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeCStrUser);
2942  std::optional<matrix_ext::matrix_type> MatrixTypeDUserOpt =
2943  convertMatrixTypeStringMatrixTypeEnumValue(MatrixTypeDStrUser);
2944 
2945  bool IsMatrixMadCompatible = false;
2946 
2947  for (const auto &Combination : SupportedMatrixCombinations) {
2948  if (!MatrixTypeAUserOpt || !MatrixTypeBUserOpt || !MatrixTypeCUserOpt ||
2949  !MatrixTypeDUserOpt)
2950  continue;
2951 
2952  bool IsMatrixTypeACompatible =
2953  (MatrixTypeAUserOpt.value() == Combination.atype);
2954  bool IsMatrixTypeBCompatible =
2955  (MatrixTypeBUserOpt.value() == Combination.btype);
2956  bool IsMatrixTypeCCompatible =
2957  (MatrixTypeCUserOpt.value() == Combination.ctype);
2958  bool IsMatrixTypeDCompatible =
2959  (MatrixTypeDUserOpt.value() == Combination.dtype);
2960  bool IsMSizeCompatible =
2961  ((Combination.msize != 0) ? (MSizeUser == Combination.msize)
2962  : (MSizeUser <= Combination.max_msize));
2963  bool IsKSizeCompatible =
2964  ((Combination.ksize != 0) ? (KSizeUser == Combination.ksize)
2965  : (KSizeUser <= Combination.max_ksize));
2966  bool IsNSizeCompatible =
2967  ((Combination.nsize != 0) ? (NSizeUser == Combination.nsize)
2968  : (NSizeUser <= Combination.max_nsize));
2969 
2970  IsMatrixMadCompatible =
2971  IsMatrixTypeACompatible && IsMatrixTypeBCompatible &&
2972  IsMatrixTypeCCompatible && IsMatrixTypeDCompatible &&
2973  IsMSizeCompatible && IsKSizeCompatible && IsNSizeCompatible;
2974 
2975  // early exit if we have a match
2976  if (IsMatrixMadCompatible)
2977  break;
2978  }
2979 
2980  if (!IsMatrixMadCompatible)
2981  return sycl::exception(
2983  "joint_matrix_mad function with parameters atype=" +
2984  MatrixTypeAStrUser + ", btype=" + MatrixTypeBStrUser +
2985  ", ctype=" + MatrixTypeCStrUser + ", dtype=" +
2986  MatrixTypeDStrUser + ", M=" + std::to_string(MSizeUser) + ", K=" +
2987  std::to_string(KSizeUser) + ", N=" + std::to_string(NSizeUser) +
2988  " is not supported on this "
2989  "device");
2990  }
2991  return std::nullopt;
2992 }
2993 
2994 std::optional<sycl::exception>
2996  const RTDeviceBinaryImage &Img,
2997  const NDRDescT &NDRDesc) {
2998  auto getPropIt = [&Img](const std::string &PropName) {
2999  auto &PropRange = Img.getDeviceRequirements();
3001  PropRange.begin(), PropRange.end(),
3003  return (*Prop)->Name == PropName;
3004  });
3005  return (PropIt == PropRange.end())
3006  ? std::nullopt
3007  : std::optional<
3009  };
3010 
3011  auto AspectsPropIt = getPropIt("aspects");
3012  auto JointMatrixPropIt = getPropIt("joint_matrix");
3013  auto JointMatrixMadPropIt = getPropIt("joint_matrix_mad");
3014  auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size");
3015  auto ReqdWGSizeUint64TPropIt = getPropIt("reqd_work_group_size_uint64_t");
3016  auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size");
3017  auto WorkGroupNumDim = getPropIt("work_group_num_dim");
3018 
3019  // Checking if device supports defined aspects
3020  if (AspectsPropIt) {
3021  ByteArray Aspects =
3022  DeviceBinaryProperty(*(AspectsPropIt.value())).asByteArray();
3023  // Drop 8 bytes describing the size of the byte array.
3024  Aspects.dropBytes(8);
3025  while (!Aspects.empty()) {
3026  aspect Aspect = Aspects.consume<aspect>();
3027  if (!Dev.has(Aspect))
3029  "Required aspect " + getAspectNameStr(Aspect) +
3030  " is not supported on the device");
3031  }
3032  }
3033 
3034  if (JointMatrixPropIt) {
3035  std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
3036  Dev.get_info<
3037  ext::oneapi::experimental::info::device::matrix_combinations>();
3038 
3039  if (Combinations.empty())
3041  "no matrix hardware on the target device, "
3042  "joint_matrix is not supported");
3043 
3044  ByteArray JointMatrixByteArray =
3045  DeviceBinaryProperty(*(JointMatrixPropIt.value())).asByteArray();
3046  // Drop 8 bytes describing the size of the byte array.
3047  JointMatrixByteArray.dropBytes(8);
3048  std::string JointMatrixByteArrayToStr;
3049  while (!JointMatrixByteArray.empty()) {
3050  JointMatrixByteArrayToStr += JointMatrixByteArray.consume<char>();
3051  }
3052  std::optional<sycl::exception> Result =
3053  checkDevSupportJointMatrix(JointMatrixByteArrayToStr, Combinations);
3054  if (Result)
3055  return Result.value();
3056  }
3057 
3058  if (JointMatrixMadPropIt) {
3059  std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
3060  Dev.get_info<
3061  ext::oneapi::experimental::info::device::matrix_combinations>();
3062 
3063  if (Combinations.empty())
3065  "no matrix hardware on the target device, "
3066  "joint_matrix_mad is not supported");
3067 
3068  ByteArray JointMatrixMadByteArray =
3069  DeviceBinaryProperty(*(JointMatrixMadPropIt.value())).asByteArray();
3070  // Drop 8 bytes describing the size of the byte array.
3071  JointMatrixMadByteArray.dropBytes(8);
3072  std::string JointMatrixMadByteArrayToStr;
3073  while (!JointMatrixMadByteArray.empty()) {
3074  JointMatrixMadByteArrayToStr += JointMatrixMadByteArray.consume<char>();
3075  }
3076  std::optional<sycl::exception> Result = checkDevSupportJointMatrixMad(
3077  JointMatrixMadByteArrayToStr, Combinations);
3078  if (Result)
3079  return Result.value();
3080  }
3081 
3082  // Checking if device supports defined required work group size
3083  if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
3085  // stores its values as uint32_t, but this needed to be expanded to
3086  // uint64_t. However, this change did not happen in ABI-breaking
3087  // window, so we attach the required work-group size as the
3088  // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
3089  // window, we can remove the logic for the 32 bit property.
3090  bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value();
3091  auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
3092 
3093  ByteArray ReqdWGSize = DeviceBinaryProperty(*(it.value())).asByteArray();
3094  // Drop 8 bytes describing the size of the byte array.
3095  ReqdWGSize.dropBytes(8);
3096  uint64_t ReqdWGSizeAllDimsTotal = 1;
3097  std::vector<uint64_t> ReqdWGSizeVec;
3098  int Dims = 0;
3099  while (!ReqdWGSize.empty()) {
3100  uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.consume<uint64_t>()
3101  : ReqdWGSize.consume<uint32_t>();
3102  if (auto res = multiply_with_overflow_check(ReqdWGSizeAllDimsTotal,
3103  SingleDimSize))
3104  ReqdWGSizeAllDimsTotal = *res;
3105  else
3106  return sycl::exception(
3107  sycl::errc::kernel_not_supported,
3108  "Required work-group size is not supported"
3109  " (total number of work-items requested can't fit into size_t)");
3110  ReqdWGSizeVec.push_back(SingleDimSize);
3111  Dims++;
3112  }
3113 
3114  size_t UserProvidedNumDims = 0;
3115  if (WorkGroupNumDim) {
3116  // We know the dimensions have been padded to 3, make sure that the pad
3117  // value is always set to 1 and record the number of dimensions specified
3118  // by the user.
3119  UserProvidedNumDims =
3120  DeviceBinaryProperty(*(WorkGroupNumDim.value())).asUint32();
3121 #ifndef NDEBUG
3122  for (unsigned i = UserProvidedNumDims; i < 3; ++i)
3123  assert(ReqdWGSizeVec[i] == 1 &&
3124  "Incorrect padding in required work-group size metadata.");
3125 #endif // NDEBUG
3126  } else {
3127  UserProvidedNumDims = Dims;
3128  }
3129 
3130  if (NDRDesc.Dims != 0 && NDRDesc.Dims != UserProvidedNumDims)
3131  return sycl::exception(
3133  "The local size dimension of submitted nd_range doesn't match the "
3134  "required work-group size dimension");
3135 
3136  // The SingleDimSize was computed in an uint64_t; size_t does not
3137  // necessarily have to be the same uint64_t (but should fit in an
3138  // uint64_t).
3139  if (ReqdWGSizeAllDimsTotal >
3140  Dev.get_info<info::device::max_work_group_size>())
3141  return sycl::exception(sycl::errc::kernel_not_supported,
3142  "Required work-group size " +
3143  std::to_string(ReqdWGSizeAllDimsTotal) +
3144  " is not supported on the device");
3145  // Creating std::variant to call max_work_item_sizes one time to avoid
3146  // performance drop
3147  std::variant<id<1>, id<2>, id<3>> MaxWorkItemSizesVariant;
3148  if (Dims == 1)
3149  MaxWorkItemSizesVariant =
3151  else if (Dims == 2)
3152  MaxWorkItemSizesVariant =
3154  else // (Dims == 3)
3155  MaxWorkItemSizesVariant =
3157  for (int i = 0; i < Dims; i++) {
3158  // Extracting value from std::variant to avoid dealing with type-safety
3159  // issues after that
3160  if (Dims == 1) {
3161  // ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes
3162  if (ReqdWGSizeVec[i] >
3163  std::get<id<1>>(MaxWorkItemSizesVariant)[Dims - i - 1])
3164  return sycl::exception(sycl::errc::kernel_not_supported,
3165  "Required work-group size " +
3166  std::to_string(ReqdWGSizeVec[i]) +
3167  " is not supported");
3168  } else if (Dims == 2) {
3169  if (ReqdWGSizeVec[i] >
3170  std::get<id<2>>(MaxWorkItemSizesVariant)[Dims - i - 1])
3171  return sycl::exception(sycl::errc::kernel_not_supported,
3172  "Required work-group size " +
3173  std::to_string(ReqdWGSizeVec[i]) +
3174  " is not supported");
3175  } else // (Dims == 3)
3176  if (ReqdWGSizeVec[i] >
3177  std::get<id<3>>(MaxWorkItemSizesVariant)[Dims - i - 1])
3178  return sycl::exception(sycl::errc::kernel_not_supported,
3179  "Required work-group size " +
3180  std::to_string(ReqdWGSizeVec[i]) +
3181  " is not supported");
3182  }
3183  }
3184 
3185  // Check if device supports required sub-group size.
3186  if (ReqdSubGroupSizePropIt) {
3187  auto ReqdSubGroupSize =
3188  DeviceBinaryProperty(*(ReqdSubGroupSizePropIt.value())).asUint32();
3189  auto SupportedSubGroupSizes = Dev.get_info<info::device::sub_group_sizes>();
3190  // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD,
3191  // as ESIMD images have a reqd-sub-group-size of 1, but currently
3192  // no backend currently includes 1 as a valid sub-group size.
3193  // This can be removed if backends add 1 as a valid sub-group size.
3194  if (!getUint32PropAsBool(Img, "isEsimdImage") &&
3195  std::none_of(SupportedSubGroupSizes.cbegin(),
3196  SupportedSubGroupSizes.cend(),
3197  [=](auto s) { return s == ReqdSubGroupSize; }))
3198  return sycl::exception(sycl::errc::kernel_not_supported,
3199  "Sub-group size " +
3200  std::to_string(ReqdSubGroupSize) +
3201  " is not supported on the device");
3202  }
3203 
3204  return {};
3205 }
3206 
3207 } // namespace detail
3208 } // namespace _V1
3209 } // namespace sycl
3210 
3212  sycl::detail::ProgramManager::getInstance().addImages(desc);
3213 }
3214 
3215 // Executed as a part of current module's (.exe, .dll) static initialization
3217  (void)desc;
3218  // TODO implement the function
3219 }
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:124
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:120
void dropBytes(std::size_t Bytes)
static GlobalHandler & instance()
bool insertBuiltProgram(const ProgramCacheKeyT &CacheKey, ur_program_handle_t Program)
auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
std::pair< KernelBuildResultPtr, bool > getOrInsertKernel(ur_program_handle_t Program, const std::string &KernelName)
std::pair< ur_kernel_handle_t, const KernelArgMask * > KernelArgMaskPairT
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
std::pair< ProgramBuildResultPtr, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
static constexpr const char * DirSep
Definition: os_util.hpp:55
static std::string getCurrentDSODir()
Returns an absolute path to a directory where the object was found.
static std::vector< std::vector< char > > getItemFromDisc(const device &Device, const std::vector< const RTDeviceBinaryImage * > &Imgs, const SerializedObj &SpecConsts, const std::string &BuildOptionsString)
static void putItemToDisc(const device &Device, const std::vector< const RTDeviceBinaryImage * > &Imgs, const SerializedObj &SpecConsts, const std::string &BuildOptionsString, const ur_program_handle_t &NativePrg)
ur_program_handle_t getBuiltURProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={}, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
std::tuple< ur_kernel_handle_t, std::mutex *, const KernelArgMask *, ur_program_handle_t > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={})
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
kernel_id getBuiltInKernelID(const std::string &KernelName)
void addImages(sycl_device_binaries DeviceImages)
static ProgramManager & getInstance()
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
const KernelArgMask * getEliminatedKernelArgMask(ur_program_handle_t NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
kernel_id getSYCLKernelID(const std::string &KernelName)
static std::string getProgramBuildLog(const ur_program_handle_t &Program, const ContextImplPtr Context)
std::pair< ur_program_handle_t, bool > getOrCreateURProgram(const RTDeviceBinaryImage &Img, const std::vector< const RTDeviceBinaryImage * > &AllImages, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a UR program using either a cached device code binary if present in the persistent cache or f...
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
bool kernelUsesAssert(const std::string &KernelName) const
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr Context)
bool hasCompatibleImage(const device &Dev)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::vector< kernel_id > getAllSYCLKernelIDs()
virtual void dump(std::ostream &Out) const
const sycl_device_binary_struct & getRawData() const
ur::DeviceBinaryType getFormat() const
Returns the format of the binary image.
const std::vector< ur_program_metadata_t > & getProgramMetadataUR() const
sycl_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
const PropertyRange & getDeviceRequirements() const
const PropertyRange & getDeviceLibReqMask() const
static const char * get()
Definition: config.hpp:115
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.hpp:215
bool has(aspect Aspect) const __SYCL_WARN_IMAGE_ASPECT(Aspect)
Indicates if the SYCL device has the given feature.
Definition: device.cpp:207
A unique identifier of an item in an index space.
Definition: id.hpp:36
Objects of the class identify kernel is some kernel_bundle related APIs.
Objects of the property_list class are containers for the SYCL properties.
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: compiler.hpp:29
#define __SYCL_DEVICE_BINARY_TARGET_AMDGCN
Definition: compiler.hpp:32
#define __SYCL_DEVICE_BINARY_TARGET_NVPTX64
PTX 64-bit image <-> "nvptx64", 64-bit NVIDIA PTX device.
Definition: compiler.hpp:31
sycl_device_binary_type
Types of device binary.
Definition: compiler.hpp:114
@ SYCL_DEVICE_BINARY_TYPE_SPIRV
Definition: compiler.hpp:117
@ SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
Definition: compiler.hpp:118
@ SYCL_DEVICE_BINARY_TYPE_NONE
Definition: compiler.hpp:115
@ SYCL_DEVICE_BINARY_TYPE_NATIVE
Definition: compiler.hpp:116
#define __SYCL_DEVICE_BINARY_TARGET_UNKNOWN
Target identification strings.
Definition: compiler.hpp:20
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
Definition: compiler.hpp:24
#define __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU
Definition: compiler.hpp:33
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: compiler.hpp:27
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: compiler.hpp:28
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV32
SPIR-V 32-bit image <-> "spir", 32-bit OpenCL device.
Definition: compiler.hpp:22
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
__SYCL_EXTERN_STREAM_ATTRS ostream clog
Linked to standard error (buffered)
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
sycl_device_binary_type getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
Definition: ur.cpp:335
ur_program_metadata_t mapDeviceBinaryPropertyToProgramMetadata(const sycl_device_binary_property &DeviceBinaryProperty)
Definition: ur.cpp:382
::sycl_device_binary_type DeviceBinaryType
Definition: ur_utils.hpp:72
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts)
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc)
void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image, bool JITCompilationIsRequired)
static constexpr int DbgProgMgr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:31
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, ur_program_handle_t &Prog)
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
std::optional< matrix_ext::matrix_type > convertMatrixTypeStringMatrixTypeEnumValue(const std::string &MatrixTypeString)
static const char * getDeviceLibFilename(DeviceLibExt Extension, bool Native)
static bool isDeviceBinaryTypeSupported(const context &C, ur::DeviceBinaryType Format)
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, const char *PropName)
std::vector< bool > KernelArgMask
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, const char *PropName)
static const char * getUrDeviceTarget(const char *URDeviceTarget)
static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, ur_device_handle_t Device, bool UseNativeLib)
static void appendLinkOptionsFromImage(std::string &LinkOpts, const RTDeviceBinaryImage &Img)
static ur_program_handle_t createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
static constexpr uint32_t ITTSpecConstId
KernelArgMask createKernelArgMask(const ByteArray &Bytes)
static std::string getAspectNameStr(sycl::aspect AspectNum)
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
static void appendCompileOptionsForGRFSizeProperties(std::string &CompileOpts, const RTDeviceBinaryImage &Img, bool IsEsimdImage)
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts)
static const std::map< DeviceLibExt, const char * > DeviceLibExtensionStrs
std::optional< sycl::exception > checkDevSupportJointMatrix(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
constexpr std::optional< ext::oneapi::experimental::matrix::use > convertMatrixUseStringToEnum(const char *UseString)
static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devs, const PluginPtr &)
RTDeviceBinaryImage * getBinImageFromMultiMap(const std::unordered_multimap< StorageKey, RTDeviceBinaryImage * > &ImagesSet, const StorageKey &Key, const context &Context, const device &Device)
static void setSpecializationConstants(const std::shared_ptr< device_image_impl > &InputImpl, ur_program_handle_t Prog, const PluginPtr &Plugin)
std::vector< std::string > split_string(std::string_view str, char delimeter)
std::shared_ptr< plugin > PluginPtr
Definition: ur.hpp:60
static ur_result_t doCompile(const PluginPtr &Plugin, ur_program_handle_t Program, uint32_t NumDevs, ur_device_handle_t *Devs, ur_context_handle_t Ctx, const char *Opts)
std::shared_ptr< device_impl > DeviceImplPtr
std::optional< sycl::exception > checkDevSupportJointMatrixMad(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devices, const PluginPtr &Plugin)
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
bool isMatrixSupportedByHW(const std::string &MatrixTypeStrUser, size_t RowsUser, size_t ColsUser, matrix_ext::matrix_type MatrixTypeRuntime, size_t MaxRowsRuntime, size_t MaxColsRuntime, size_t RowsRuntime, size_t ColsRuntime)
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:35
static std::enable_if_t< std::is_unsigned_v< T >, std::optional< T > > multiply_with_overflow_check(T x, T y)
static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts)
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
static void appendCompileEnvironmentVariablesThatAppend(std::string &CompileOpts)
static void emitBuiltProgramInfo(const ur_program_handle_t &Prog, const ContextImplPtr &Context)
Emits information about built programs if the appropriate contitions are met, namely when SYCL_RT_WAR...
std::vector< unsigned char > SerializedObj
Definition: util.hpp:69
exception set_ur_error(exception &&e, int32_t ur_err)
Definition: exception.hpp:157
static const std::map< DeviceLibExt, std::pair< const char *, const char * > > DeviceLibNames
static std::vector< ur_program_handle_t > getDeviceLibPrograms(const ContextImplPtr Context, const ur_device_handle_t &Device, uint32_t DeviceLibReqMask)
static ur_program_handle_t createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< ur_program_metadata_t > Metadata)
static const char * getFormatStr(ur::DeviceBinaryType Format)
static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog, const PluginPtr &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: helpers.cpp:25
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
auto autodecltype(a) b
pointer get() const
Definition: multi_ptr.hpp:544
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:65
autodecltype(x) x
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: access.hpp:18
void __sycl_unregister_lib(sycl_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
void __sycl_register_lib(sycl_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
bool none_of(const simd_mask< _Tp, _Abi > &) noexcept
Definition: compiler.hpp:75
This struct is a record of all the device code that may be offloaded.
Definition: compiler.hpp:186
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
Definition: compiler.hpp:192
sycl_device_binary DeviceBinaries
Device binaries data.
Definition: compiler.hpp:194
This struct is a record of the device binary information.
Definition: compiler.hpp:132
sycl_offload_entry EntriesEnd
Definition: compiler.hpp:169
sycl_offload_entry EntriesBegin
the offload entry table
Definition: compiler.hpp:168
const unsigned char * BinaryStart
Pointer to the target code start.
Definition: compiler.hpp:164
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
Definition: compiler.hpp:152
const unsigned char * BinaryEnd
Pointer to the target code end.
Definition: compiler.hpp:166
C++ utilities for Unified Runtime integration.