DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.cpp
Go to the documentation of this file.
1 //==------ program_manager.cpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <CL/sycl/context.hpp>
14 #include <CL/sycl/detail/util.hpp>
15 #include <CL/sycl/device.hpp>
16 #include <CL/sycl/exception.hpp>
17 #include <CL/sycl/stl.hpp>
18 #include <detail/config.hpp>
19 #include <detail/context_impl.hpp>
21 #include <detail/device_impl.hpp>
24 #include <detail/program_impl.hpp>
28 
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32 #include <cstdlib>
33 #include <cstring>
34 #include <fstream>
35 #include <memory>
36 #include <mutex>
37 #include <sstream>
38 #include <string>
39 
41 namespace sycl {
42 namespace detail {
43 
44 using ContextImplPtr = std::shared_ptr<cl::sycl::detail::context_impl>;
45 
46 static constexpr int DbgProgMgr = 0;
47 
49 
50 static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV");
51 
55  const plugin &Plugin) {
57  constexpr char SpecValue = 1;
59  Prog, ITTSpecConstId, sizeof(char), &SpecValue);
60  }
61 }
62 
63 ProgramManager &ProgramManager::getInstance() {
64  return GlobalHandler::instance().getProgramManager();
65 }
66 
67 static RT::PiProgram
69  const unsigned char *Data, size_t DataLen,
70  const std::vector<pi_device_binary_property> Metadata) {
71  const detail::plugin &Plugin = Context->getPlugin();
72 #ifndef _NDEBUG
73  pi_uint32 NumDevices = 0;
74  Plugin.call<PiApiKind::piContextGetInfo>(Context->getHandleRef(),
76  sizeof(NumDevices), &NumDevices,
77  /*param_value_size_ret=*/nullptr);
78  assert(NumDevices > 0 &&
79  "Only a single device is supported for AOT compilation");
80 #endif
81 
82  RT::PiProgram Program;
83  const RT::PiDevice PiDevice = getSyclObjImpl(Device)->getHandleRef();
84  pi_int32 BinaryStatus = CL_SUCCESS;
86  Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data,
87  Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
88 
89  if (BinaryStatus != CL_SUCCESS) {
90  throw runtime_error("Creating program with binary failed.", BinaryStatus);
91  }
92 
93  return Program;
94 }
95 
97  const unsigned char *Data,
98  size_t DataLen) {
99  RT::PiProgram Program = nullptr;
100  const detail::plugin &Plugin = Context->getPlugin();
101  Plugin.call<PiApiKind::piProgramCreate>(Context->getHandleRef(), Data,
102  DataLen, &Program);
103  return Program;
104 }
105 
107 ProgramManager::getDeviceImage(OSModuleHandle M, const std::string &KernelName,
108  const context &Context, const device &Device,
109  bool JITCompilationIsRequired) {
110  if (DbgProgMgr > 0)
111  std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \""
112  << KernelName << "\", " << getRawSyclObjImpl(Context) << ", "
113  << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired
114  << ")\n";
115 
116  KernelSetId KSId = getKernelSetId(M, KernelName);
117  return getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
118 }
119 
120 template <typename ExceptionT, typename RetT>
123  // any thread which will find nullptr in cache will wait until the pointer
124  // is not null anymore
125  Cache.waitUntilBuilt(*BuildResult, [BuildResult]() {
126  int State = BuildResult->State.load();
127 
128  return State == BS_Done || State == BS_Failed;
129  });
130 
131  if (BuildResult->Error.isFilledIn()) {
132  const KernelProgramCache::BuildError &Error = BuildResult->Error;
133  throw ExceptionT(Error.Msg, Error.Code);
134  }
135 
136  RetT *Result = BuildResult->Ptr.load();
137 
138  return Result;
139 }
140 
160 template <typename RetT, typename ExceptionT, typename KeyT, typename AcquireFT,
161  typename GetCacheFT, typename BuildFT>
163 getOrBuild(KernelProgramCache &KPCache, KeyT &&CacheKey, AcquireFT &&Acquire,
164  GetCacheFT &&GetCache, BuildFT &&Build) {
165  bool InsertionTookPlace;
167 
168  {
169  auto LockedCache = Acquire(KPCache);
170  auto &Cache = GetCache(LockedCache);
171  auto Inserted =
172  Cache.emplace(std::piecewise_construct, std::forward_as_tuple(CacheKey),
173  std::forward_as_tuple(nullptr, BS_InProgress));
174 
175  InsertionTookPlace = Inserted.second;
176  BuildResult = &Inserted.first->second;
177  }
178 
179  // no insertion took place, thus some other thread has already inserted smth
180  // in the cache
181  if (!InsertionTookPlace) {
182  for (;;) {
183  RetT *Result = waitUntilBuilt<ExceptionT>(KPCache, BuildResult);
184 
185  if (Result)
186  return BuildResult;
187 
188  // Previous build is failed. There was no SYCL exception though.
189  // We might try to build once more.
190  int Expected = BS_Failed;
191  int Desired = BS_InProgress;
192 
193  if (BuildResult->State.compare_exchange_strong(Expected, Desired))
194  break; // this thread is the building thread now
195  }
196  }
197 
198  // only the building thread will run this
199  try {
200  RetT *Desired = Build();
201 
202 #ifndef NDEBUG
203  RetT *Expected = nullptr;
204 
205  if (!BuildResult->Ptr.compare_exchange_strong(Expected, Desired))
206  // We've got a funny story here
207  assert(false && "We've build an entity that is already have been built.");
208 #else
209  BuildResult->Ptr.store(Desired);
210 #endif
211 
212  {
213  // Even if shared variable is atomic, it must be modified under the mutex
214  // in order to correctly publish the modification to the waiting thread
215  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
216  BuildResult->State.store(BS_Done);
217  }
218 
219  KPCache.notifyAllBuild(*BuildResult);
220 
221  return BuildResult;
222  } catch (const exception &Ex) {
223  BuildResult->Error.Msg = Ex.what();
224  BuildResult->Error.Code = Ex.get_cl_code();
225 
226  {
227  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
228  BuildResult->State.store(BS_Failed);
229  }
230 
231  KPCache.notifyAllBuild(*BuildResult);
232 
233  std::rethrow_exception(std::current_exception());
234  } catch (...) {
235  {
236  std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
237  BuildResult->State.store(BS_Failed);
238  }
239 
240  KPCache.notifyAllBuild(*BuildResult);
241 
242  std::rethrow_exception(std::current_exception());
243  }
244 }
245 
246 // TODO replace this with a new PI API function
248  RT::PiDeviceBinaryType Format) {
249  // All formats except PI_DEVICE_BINARY_TYPE_SPIRV are supported.
250  if (Format != PI_DEVICE_BINARY_TYPE_SPIRV)
251  return true;
252 
253  const backend ContextBackend =
254  detail::getSyclObjImpl(C)->getPlugin().getBackend();
255 
256  // The CUDA backend cannot use SPIR-V
257  if (ContextBackend == backend::ext_oneapi_cuda)
258  return false;
259 
260  std::vector<device> Devices = C.get_devices();
261 
262  // Program type is SPIR-V, so we need a device compiler to do JIT.
263  for (const device &D : Devices) {
264  if (!D.get_info<info::device::is_compiler_available>())
265  return false;
266  }
267 
268  // OpenCL 2.1 and greater require clCreateProgramWithIL
269  if (ContextBackend == backend::opencl) {
270  std::string ver = C.get_platform().get_info<info::platform::version>();
271  if (ver.find("OpenCL 1.0") == std::string::npos &&
272  ver.find("OpenCL 1.1") == std::string::npos &&
273  ver.find("OpenCL 1.2") == std::string::npos &&
274  ver.find("OpenCL 2.0") == std::string::npos)
275  return true;
276  }
277 
278  for (const device &D : Devices) {
279  // We need cl_khr_il_program extension to be present
280  // and we can call clCreateProgramWithILKHR using the extension
281  std::vector<std::string> Extensions =
282  D.get_info<info::device::extensions>();
283  if (Extensions.end() ==
284  std::find(Extensions.begin(), Extensions.end(), "cl_khr_il_program"))
285  return false;
286  }
287 
288  return true;
289 }
290 
291 static const char *getFormatStr(RT::PiDeviceBinaryType Format) {
292  switch (Format) {
294  return "none";
296  return "native";
298  return "SPIR-V";
300  return "LLVM IR";
301  }
302  assert(false && "Unknown device image format");
303  return "unknown";
304 }
305 
306 RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img,
307  const context &Context,
308  const device &Device) {
309  if (DbgProgMgr > 0)
310  std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", "
311  << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device)
312  << ")\n";
313  const pi_device_binary_struct &RawImg = Img.getRawData();
314 
315  // perform minimal sanity checks on the device image and the descriptor
316  if (RawImg.BinaryEnd < RawImg.BinaryStart) {
317  throw runtime_error("Malformed device program image descriptor",
318  PI_ERROR_INVALID_VALUE);
319  }
320  if (RawImg.BinaryEnd == RawImg.BinaryStart) {
321  throw runtime_error("Invalid device program image: size is zero",
322  PI_ERROR_INVALID_VALUE);
323  }
324  size_t ImgSize = Img.getSize();
325 
326  // TODO if the binary image is a part of the fat binary, the clang
327  // driver should have set proper format option to the
328  // clang-offload-wrapper. The fix depends on AOT compilation
329  // implementation, so will be implemented together with it.
330  // Img->Format can't be updated as it is inside of the in-memory
331  // OS module binary.
332  RT::PiDeviceBinaryType Format = Img.getFormat();
333 
334  if (Format == PI_DEVICE_BINARY_TYPE_NONE)
335  Format = pi::getBinaryImageFormat(RawImg.BinaryStart, ImgSize);
336  // RT::PiDeviceBinaryType Format = Img->Format;
337  // assert(Format != PI_DEVICE_BINARY_TYPE_NONE && "Image format not set");
338 
339  if (!isDeviceBinaryTypeSupported(Context, Format))
340  throw feature_not_supported(
341  "SPIR-V online compilation is not supported in this context",
342  PI_ERROR_INVALID_OPERATION);
343 
344  // Get program metadata from properties
345  auto ProgMetadata = Img.getProgramMetadata();
346  std::vector<pi_device_binary_property> ProgMetadataVector{
347  ProgMetadata.begin(), ProgMetadata.end()};
348 
349  // Load the image
350  const ContextImplPtr Ctx = getSyclObjImpl(Context);
352  ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize)
353  : createBinaryProgram(Ctx, Device, RawImg.BinaryStart,
354  ImgSize, ProgMetadataVector);
355 
356  {
357  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
358  // associate the PI program with the image it was created for
359  NativePrograms[Res] = &Img;
360  }
361 
362  if (DbgProgMgr > 1)
363  std::cerr << "created program: " << Res
364  << "; image format: " << getFormatStr(Format) << "\n";
365 
366  return Res;
367 }
368 
369 static void appendLinkOptionsFromImage(std::string &LinkOpts,
370  const RTDeviceBinaryImage &Img) {
371  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
372  // Update only if link options are not overwritten by environment variable
373  if (!LinkOptsEnv) {
374  const char *TemporaryStr = Img.getLinkOptions();
375  if (TemporaryStr != nullptr) {
376  if (!LinkOpts.empty())
377  LinkOpts += " ";
378  LinkOpts += std::string(TemporaryStr);
379  }
380  }
381 }
382 
384  const char *PropName) {
385  pi_device_binary_property Prop = Img.getProperty(PropName);
386  return Prop && (pi::DeviceBinaryProperty(Prop).asUint32() != 0);
387 }
388 
389 static void appendCompileOptionsFromImage(std::string &CompileOpts,
390  const RTDeviceBinaryImage &Img) {
391  // Build options are overridden if environment variables are present.
392  // Environment variables are not changed during program lifecycle so it
393  // is reasonable to use static here to read them only once.
394  static const char *CompileOptsEnv =
396  // Update only if compile options are not overwritten by environment
397  // variable
398  if (!CompileOptsEnv) {
399  if (!CompileOpts.empty())
400  CompileOpts += " ";
401  const char *TemporaryStr = Img.getCompileOptions();
402  if (TemporaryStr != nullptr)
403  CompileOpts += std::string(TemporaryStr);
404  }
405  bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage");
406  bool isDoubleGRFEsimdImage =
407  getUint32PropAsBool(Img, "isDoubleGRFEsimdImage");
408  assert((!isDoubleGRFEsimdImage || isEsimdImage) &&
409  "doubleGRF applies only to ESIMD binary images");
410  // The -vc-codegen option is always preserved for ESIMD kernels, regardless
411  // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable.
412  if (isEsimdImage) {
413  if (!CompileOpts.empty())
414  CompileOpts += " ";
415  CompileOpts += "-vc-codegen";
416  // Allow warning and performance hints from vc/finalizer if the RT warning
417  // level is at least 1.
419  CompileOpts += " -disable-finalizer-msg";
420  }
421  if (isDoubleGRFEsimdImage) {
422  assert(!CompileOpts.empty()); // -vc-codegen must be present
423  CompileOpts += " -doubleGRF";
424  }
425 }
426 
427 static void applyOptionsFromImage(std::string &CompileOpts,
428  std::string &LinkOpts,
429  const RTDeviceBinaryImage &Img) {
430  appendCompileOptionsFromImage(CompileOpts, Img);
431  appendLinkOptionsFromImage(LinkOpts, Img);
432 }
433 
434 static void applyCompileOptionsFromEnvironment(std::string &CompileOpts) {
435  // Environment variables are not changed during program lifecycle so it
436  // is reasonable to use static here to read them only once.
437  static const char *CompileOptsEnv =
439  if (CompileOptsEnv) {
440  CompileOpts = CompileOptsEnv;
441  }
442 }
443 
444 static void applyLinkOptionsFromEnvironment(std::string &LinkOpts) {
445  // Environment variables are not changed during program lifecycle so it
446  // is reasonable to use static here to read them only once.
447  static const char *LinkOptsEnv = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
448  if (LinkOptsEnv) {
449  LinkOpts = LinkOptsEnv;
450  }
451 }
452 
453 static void applyOptionsFromEnvironment(std::string &CompileOpts,
454  std::string &LinkOpts) {
455  // Build options are overridden if environment variables are present.
458 }
459 
460 std::pair<RT::PiProgram, bool> ProgramManager::getOrCreatePIProgram(
461  const RTDeviceBinaryImage &Img, const context &Context,
462  const device &Device, const std::string &CompileAndLinkOptions,
463  SerializedObj SpecConsts) {
464  RT::PiProgram NativePrg;
465 
466  auto BinProg = PersistentDeviceCodeCache::getItemFromDisc(
467  Device, Img, SpecConsts, CompileAndLinkOptions);
468  if (BinProg.size()) {
469  // Get program metadata from properties
470  auto ProgMetadata = Img.getProgramMetadata();
471  std::vector<pi_device_binary_property> ProgMetadataVector{
472  ProgMetadata.begin(), ProgMetadata.end()};
473 
474  // TODO: Build for multiple devices once supported by program manager
475  NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
476  (const unsigned char *)BinProg[0].data(),
477  BinProg[0].size(), ProgMetadataVector);
478  } else {
479  NativePrg = createPIProgram(Img, Context, Device);
480  }
481  return {NativePrg, BinProg.size()};
482 }
483 
486 static void emitBuiltProgramInfo(const pi_program &Prog,
487  const ContextImplPtr &Context) {
489  std::string ProgramBuildLog =
490  ProgramManager::getProgramBuildLog(Prog, Context);
491  std::clog << ProgramBuildLog << std::endl;
492  }
493 }
494 
495 RT::PiProgram ProgramManager::getBuiltPIProgram(
496  OSModuleHandle M, const ContextImplPtr &ContextImpl,
497  const DeviceImplPtr &DeviceImpl, const std::string &KernelName,
498  const program_impl *Prg, bool JITCompilationIsRequired) {
499  // TODO: Make sure that KSIds will be different for the case when the same
500  // kernel built with different options is present in the fat binary.
501  KernelSetId KSId = getKernelSetId(M, KernelName);
502 
503  using PiProgramT = KernelProgramCache::PiProgramT;
504  using ProgramCacheT = KernelProgramCache::ProgramCacheT;
505 
506  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
507 
508  auto AcquireF = [](KernelProgramCache &Cache) {
509  return Cache.acquireCachedPrograms();
510  };
511  auto GetF = [](const Locked<ProgramCacheT> &LockedCache) -> ProgramCacheT & {
512  return LockedCache.get();
513  };
514 
515  std::string CompileOpts;
516  std::string LinkOpts;
517  if (Prg) {
518  CompileOpts = Prg->get_build_options();
519  }
520 
521  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
522 
523  SerializedObj SpecConsts;
524  if (Prg)
525  Prg->stableSerializeSpecConstRegistry(SpecConsts);
526 
527  // Check if we can optimize program builds for sub-devices by using a program
528  // built for the root device
529  DeviceImplPtr RootDevImpl = DeviceImpl;
530  while (!RootDevImpl->isRootDevice()) {
531  auto ParentDev = detail::getSyclObjImpl(
532  RootDevImpl->get_info<info::device::parent_device>());
533  // Sharing is allowed within a single context only
534  if (!ContextImpl->hasDevice(ParentDev))
535  break;
536  RootDevImpl = ParentDev;
537  }
538 
539  pi_bool MustBuildOnSubdevice = PI_TRUE;
540  ContextImpl->getPlugin().call<PiApiKind::piDeviceGetInfo>(
541  RootDevImpl->getHandleRef(), PI_DEVICE_INFO_BUILD_ON_SUBDEVICE,
542  sizeof(pi_bool), &MustBuildOnSubdevice, nullptr);
543 
544  DeviceImplPtr Dev =
545  (MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl;
546  auto BuildF = [this, &M, &KSId, &ContextImpl, &Dev, Prg, &CompileOpts,
547  &LinkOpts, &JITCompilationIsRequired, SpecConsts] {
548  auto Context = createSyclObjFromImpl<context>(ContextImpl);
549  auto Device = createSyclObjFromImpl<device>(Dev);
550 
551  const RTDeviceBinaryImage &Img =
552  getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
553 
554  applyOptionsFromImage(CompileOpts, LinkOpts, Img);
555 
556  const detail::plugin &Plugin = ContextImpl->getPlugin();
557  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
558  Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
559 
560  if (!DeviceCodeWasInCache) {
561  if (Prg)
562  flushSpecConstants(*Prg, NativePrg, &Img);
563  if (Img.supportsSpecConstants())
564  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
565  }
566 
567  ProgramPtr ProgramManaged(
568  NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease);
569 
570  // Link a fallback implementation of device libraries if they are not
571  // supported by a device compiler.
572  // Pre-compiled programs (after AOT compilation or read from persitent
573  // cache) are supposed to be already linked.
574  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
575  // no fallback device library will be linked.
576  uint32_t DeviceLibReqMask = 0;
577  if (!DeviceCodeWasInCache &&
580  DeviceLibReqMask = getDeviceLibReqMask(Img);
581 
582  ProgramPtr BuiltProgram =
583  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
584  getRawSyclObjImpl(Device)->getHandleRef(), DeviceLibReqMask);
585 
586  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
587 
588  {
589  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
590  NativePrograms[BuiltProgram.get()] = &Img;
591  }
592 
593  // Save program to persistent cache if it is not there
594  if (!DeviceCodeWasInCache)
595  PersistentDeviceCodeCache::putItemToDisc(
596  Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
597  return BuiltProgram.release();
598  };
599 
600  const RT::PiDevice PiDevice = Dev->getHandleRef();
601 
602  auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
603  Cache,
604  std::make_pair(std::make_pair(std::move(SpecConsts), KSId),
605  std::make_pair(PiDevice, CompileOpts + LinkOpts)),
606  AcquireF, GetF, BuildF);
607  // getOrBuild is not supposed to return nullptr
608  assert(BuildResult != nullptr && "Invalid build result");
609  return BuildResult->Ptr.load();
610 }
611 
612 std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
613 ProgramManager::getOrCreateKernel(OSModuleHandle M,
614  const ContextImplPtr &ContextImpl,
615  const DeviceImplPtr &DeviceImpl,
616  const std::string &KernelName,
617  const program_impl *Prg) {
618  if (DbgProgMgr > 0) {
619  std::cerr << ">>> ProgramManager::getOrCreateKernel(" << M << ", "
620  << ContextImpl.get() << ", " << DeviceImpl.get() << ", "
621  << KernelName << ")\n";
622  }
623 
624  using PiKernelT = KernelProgramCache::PiKernelT;
625  using KernelCacheT = KernelProgramCache::KernelCacheT;
626  using KernelByNameT = KernelProgramCache::KernelByNameT;
627 
628  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
629 
630  std::string CompileOpts, LinkOpts;
631  SerializedObj SpecConsts;
632  if (Prg) {
633  CompileOpts = Prg->get_build_options();
634  Prg->stableSerializeSpecConstRegistry(SpecConsts);
635  }
636  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
637  const RT::PiDevice PiDevice = DeviceImpl->getHandleRef();
638 
639  auto key = std::make_tuple(std::move(SpecConsts), M, PiDevice,
640  CompileOpts + LinkOpts, KernelName);
641  auto ret_tuple = Cache.tryToGetKernelFast(key);
642  if (std::get<0>(ret_tuple))
643  return ret_tuple;
644 
645  RT::PiProgram Program =
646  getBuiltPIProgram(M, ContextImpl, DeviceImpl, KernelName, Prg);
647 
648  auto AcquireF = [](KernelProgramCache &Cache) {
649  return Cache.acquireKernelsPerProgramCache();
650  };
651  auto GetF =
652  [&Program](const Locked<KernelCacheT> &LockedCache) -> KernelByNameT & {
653  return LockedCache.get()[Program];
654  };
655  auto BuildF = [&Program, &KernelName, &ContextImpl] {
656  PiKernelT *Result = nullptr;
657 
658  const detail::plugin &Plugin = ContextImpl->getPlugin();
659  Plugin.call<errc::kernel_not_supported, PiApiKind::piKernelCreate>(
660  Program, KernelName.c_str(), &Result);
661 
662  // Some PI Plugins (like OpenCL) require this call to enable USM
663  // For others, PI will turn this into a NOP.
665  sizeof(pi_bool), &PI_TRUE);
666 
667  return Result;
668  };
669 
670  auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
671  Cache, KernelName, AcquireF, GetF, BuildF);
672  // getOrBuild is not supposed to return nullptr
673  assert(BuildResult != nullptr && "Invalid build result");
674  auto ret_val = std::make_tuple(BuildResult->Ptr.load(),
675  &(BuildResult->MBuildResultMutex), Program);
676  Cache.saveKernel(key, ret_val);
677  return ret_val;
678 }
679 
681 ProgramManager::getPiProgramFromPiKernel(RT::PiKernel Kernel,
682  const ContextImplPtr Context) {
683  RT::PiProgram Program;
684  const detail::plugin &Plugin = Context->getPlugin();
686  Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, nullptr);
687  return Program;
688 }
689 
690 std::string ProgramManager::getProgramBuildLog(const RT::PiProgram &Program,
691  const ContextImplPtr Context) {
692  size_t PIDevicesSize = 0;
693  const detail::plugin &Plugin = Context->getPlugin();
695  nullptr, &PIDevicesSize);
696  std::vector<RT::PiDevice> PIDevices(PIDevicesSize / sizeof(RT::PiDevice));
698  PIDevicesSize, PIDevices.data(),
699  nullptr);
700  std::string Log = "The program was built for " +
701  std::to_string(PIDevices.size()) + " devices";
702  for (RT::PiDevice &Device : PIDevices) {
703  std::string DeviceBuildInfoString;
704  size_t DeviceBuildInfoStrSize = 0;
706  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, 0, nullptr,
707  &DeviceBuildInfoStrSize);
708  if (DeviceBuildInfoStrSize > 0) {
709  std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
711  Program, Device, PI_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize,
712  DeviceBuildInfo.data(), nullptr);
713  DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
714  }
715 
716  std::string DeviceNameString;
717  size_t DeviceNameStrSize = 0;
719  nullptr, &DeviceNameStrSize);
720  if (DeviceNameStrSize > 0) {
721  std::vector<char> DeviceName(DeviceNameStrSize);
723  DeviceNameStrSize,
724  DeviceName.data(), nullptr);
725  DeviceNameString = std::string(DeviceName.data());
726  }
727  Log += "\nBuild program log for '" + DeviceNameString + "':\n" +
728  DeviceBuildInfoString;
729  }
730  return Log;
731 }
732 
733 // TODO device libraries may use scpecialization constants, manifest files, etc.
734 // To support that they need to be delivered in a different container - so that
735 // pi_device_binary_struct can be created for each of them.
736 static bool loadDeviceLib(const ContextImplPtr Context, const char *Name,
737  RT::PiProgram &Prog) {
738  std::string LibSyclDir = OSUtil::getCurrentDSODir();
739  std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
740  std::ifstream::in | std::ifstream::binary);
741  if (!File.good()) {
742  return false;
743  }
744 
745  File.seekg(0, std::ios::end);
746  size_t FileSize = File.tellg();
747  File.seekg(0, std::ios::beg);
748  std::vector<char> FileContent(FileSize);
749  File.read(&FileContent[0], FileSize);
750  File.close();
751 
752  Prog =
753  createSpirvProgram(Context, (unsigned char *)&FileContent[0], FileSize);
754  return Prog != nullptr;
755 }
756 
757 static const char *getDeviceLibFilename(DeviceLibExt Extension) {
758  switch (Extension) {
759  case DeviceLibExt::cl_intel_devicelib_assert:
760  return "libsycl-fallback-cassert.spv";
761  case DeviceLibExt::cl_intel_devicelib_math:
762  return "libsycl-fallback-cmath.spv";
763  case DeviceLibExt::cl_intel_devicelib_math_fp64:
764  return "libsycl-fallback-cmath-fp64.spv";
765  case DeviceLibExt::cl_intel_devicelib_complex:
766  return "libsycl-fallback-complex.spv";
767  case DeviceLibExt::cl_intel_devicelib_complex_fp64:
768  return "libsycl-fallback-complex-fp64.spv";
769  case DeviceLibExt::cl_intel_devicelib_cstring:
770  return "libsycl-fallback-cstring.spv";
771  case DeviceLibExt::cl_intel_devicelib_imf:
772  return "libsycl-fallback-imf.spv";
773  case DeviceLibExt::cl_intel_devicelib_imf_fp64:
774  return "libsycl-fallback-imf-fp64.spv";
775  }
776  throw compile_program_error("Unhandled (new?) device library extension",
777  PI_ERROR_INVALID_OPERATION);
778 }
779 
780 static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) {
781  switch (Extension) {
782  case DeviceLibExt::cl_intel_devicelib_assert:
783  return "cl_intel_devicelib_assert";
784  case DeviceLibExt::cl_intel_devicelib_math:
785  return "cl_intel_devicelib_math";
786  case DeviceLibExt::cl_intel_devicelib_math_fp64:
787  return "cl_intel_devicelib_math_fp64";
788  case DeviceLibExt::cl_intel_devicelib_complex:
789  return "cl_intel_devicelib_complex";
790  case DeviceLibExt::cl_intel_devicelib_complex_fp64:
791  return "cl_intel_devicelib_complex_fp64";
792  case DeviceLibExt::cl_intel_devicelib_cstring:
793  return "cl_intel_devicelib_cstring";
794  case DeviceLibExt::cl_intel_devicelib_imf:
795  return "cl_intel_devicelib_imf";
796  case DeviceLibExt::cl_intel_devicelib_imf_fp64:
797  return "cl_intel_devicelib_imf_fp64";
798  }
799  throw compile_program_error("Unhandled (new?) device library extension",
800  PI_ERROR_INVALID_OPERATION);
801 }
802 
804  DeviceLibExt Extension,
805  const RT::PiDevice &Device) {
806 
807  const char *LibFileName = getDeviceLibFilename(Extension);
808 
809  auto LockedCache = Context->acquireCachedLibPrograms();
810  auto CachedLibPrograms = LockedCache.get();
811  auto CacheResult = CachedLibPrograms.emplace(
812  std::make_pair(std::make_pair(Extension, Device), nullptr));
813  bool Cached = !CacheResult.second;
814  auto LibProgIt = CacheResult.first;
815  RT::PiProgram &LibProg = LibProgIt->second;
816 
817  if (Cached)
818  return LibProg;
819 
820  if (!loadDeviceLib(Context, LibFileName, LibProg)) {
821  CachedLibPrograms.erase(LibProgIt);
822  throw compile_program_error(std::string("Failed to load ") + LibFileName,
823  PI_ERROR_INVALID_VALUE);
824  }
825 
826  const detail::plugin &Plugin = Context->getPlugin();
827  // TODO no spec constants are used in the std libraries, support in the future
829  LibProg,
830  /*num devices = */ 1, &Device,
831  // Do not use compile options for library programs: it is not clear
832  // if user options (image options) are supposed to be applied to
833  // library program as well, and what actually happens to a SPIR-V
834  // program if we apply them.
835  "", 0, nullptr, nullptr, nullptr, nullptr);
836  if (Error != PI_SUCCESS) {
837  CachedLibPrograms.erase(LibProgIt);
838  throw compile_program_error(
839  ProgramManager::getProgramBuildLog(LibProg, Context), Error);
840  }
841 
842  return LibProg;
843 }
844 
845 ProgramManager::ProgramManager() {
846  const char *SpvFile = std::getenv(UseSpvEnv);
847  // If a SPIR-V file is specified with an environment variable,
848  // register the corresponding image
849  if (SpvFile) {
850  m_UseSpvFile = true;
851  // The env var requests that the program is loaded from a SPIR-V file on
852  // disk
853  std::ifstream File(SpvFile, std::ios::binary);
854 
855  if (!File.is_open())
856  throw runtime_error(std::string("Can't open file specified via ") +
857  UseSpvEnv + ": " + SpvFile,
858  PI_ERROR_INVALID_VALUE);
859  File.seekg(0, std::ios::end);
860  size_t Size = File.tellg();
861  std::unique_ptr<char[]> Data(new char[Size]);
862  File.seekg(0);
863  File.read(Data.get(), Size);
864  File.close();
865  if (!File.good())
866  throw runtime_error(std::string("read from ") + SpvFile +
867  std::string(" failed"),
868  PI_ERROR_INVALID_VALUE);
869  auto ImgPtr = make_unique_ptr<DynRTDeviceBinaryImage>(
870  std::move(Data), Size, OSUtil::DummyModuleHandle);
871 
872  if (DbgProgMgr > 0) {
873  std::cerr << "loaded device image binary from " << SpvFile << "\n";
874  std::cerr << "format: " << getFormatStr(ImgPtr->getFormat()) << "\n";
875  }
876  // No need for a mutex here since all access to these private fields is
877  // blocked until the construction of the ProgramManager singleton is
878  // finished.
879  m_DeviceImages[SpvFileKSId].reset(
880  new std::vector<RTDeviceBinaryImageUPtr>());
881  m_DeviceImages[SpvFileKSId]->push_back(std::move(ImgPtr));
882  }
883 }
884 
886 ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId,
887  const context &Context, const device &Device,
888  bool JITCompilationIsRequired) {
889  if (DbgProgMgr > 0) {
890  std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \"" << KSId
891  << "\", " << getRawSyclObjImpl(Context) << ", "
892  << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired
893  << ")\n";
894 
895  std::cerr << "available device images:\n";
896  debugPrintBinaryImages();
897  }
898  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
899  std::vector<RTDeviceBinaryImageUPtr> &Imgs = *m_DeviceImages[KSId];
900  const ContextImplPtr Ctx = getSyclObjImpl(Context);
901  pi_uint32 ImgInd = 0;
902  RTDeviceBinaryImage *Img = nullptr;
903 
904  // TODO: There may be cases with cl::sycl::program class usage in source code
905  // that will result in a multi-device context. This case needs to be handled
906  // here or at the program_impl class level
907 
908  // Ask the native runtime under the given context to choose the device image
909  // it prefers.
910  std::vector<pi_device_binary> RawImgs(Imgs.size());
911  for (unsigned I = 0; I < Imgs.size(); I++)
912  RawImgs[I] = const_cast<pi_device_binary>(&Imgs[I]->getRawData());
913 
914  Ctx->getPlugin().call<PiApiKind::piextDeviceSelectBinary>(
915  getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
916  (pi_uint32)RawImgs.size(), &ImgInd);
917 
918  if (JITCompilationIsRequired) {
919  // If the image is already compiled with AOT, throw an exception.
920  const pi_device_binary_struct &RawImg = Imgs[ImgInd]->getRawData();
921  if ((strcmp(RawImg.DeviceTargetSpec,
923  (strcmp(RawImg.DeviceTargetSpec,
925  (strcmp(RawImg.DeviceTargetSpec,
927  throw feature_not_supported("Recompiling AOT image is not supported",
928  PI_ERROR_INVALID_OPERATION);
929  }
930  }
931 
932  Img = Imgs[ImgInd].get();
933 
934  if (DbgProgMgr > 0) {
935  std::cerr << "selected device image: " << &Img->getRawData() << "\n";
936  Img->print();
937  }
938  return *Img;
939 }
940 
941 static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) {
942  uint32_t Mask =
943  0x1 << (static_cast<uint32_t>(Ext) -
944  static_cast<uint32_t>(DeviceLibExt::cl_intel_devicelib_assert));
945  return ((DeviceLibReqMask & Mask) == Mask);
946 }
947 
948 static std::vector<RT::PiProgram>
950  uint32_t DeviceLibReqMask) {
951  std::vector<RT::PiProgram> Programs;
952 
953  std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
954  {DeviceLibExt::cl_intel_devicelib_assert,
955  /* is fallback loaded? */ false},
956  {DeviceLibExt::cl_intel_devicelib_math, false},
957  {DeviceLibExt::cl_intel_devicelib_math_fp64, false},
958  {DeviceLibExt::cl_intel_devicelib_complex, false},
959  {DeviceLibExt::cl_intel_devicelib_complex_fp64, false},
960  {DeviceLibExt::cl_intel_devicelib_cstring, false},
961  {DeviceLibExt::cl_intel_devicelib_imf, false},
962  {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}};
963 
964  // Disable all devicelib extensions requiring fp64 support if at least
965  // one underlying device doesn't support cl_khr_fp64.
966  std::string DevExtList =
968  Device, Context->getPlugin());
969  const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64"));
970 
971  // Load a fallback library for an extension if the device does not
972  // support it.
973  for (auto &Pair : RequiredDeviceLibExt) {
974  DeviceLibExt Ext = Pair.first;
975  bool &FallbackIsLoaded = Pair.second;
976 
977  if (FallbackIsLoaded) {
978  continue;
979  }
980 
981  if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) {
982  continue;
983  }
984  if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 ||
985  Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) &&
986  !fp64Support) {
987  continue;
988  }
989 
990  const char *ExtStr = getDeviceLibExtensionStr(Ext);
991 
992  bool InhibitNativeImpl = false;
993  if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) {
994  InhibitNativeImpl = strstr(Env, ExtStr) != nullptr;
995  }
996 
997  bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr);
998 
999  if (!DeviceSupports || InhibitNativeImpl) {
1000  Programs.push_back(loadDeviceLibFallback(Context, Ext, Device));
1001  FallbackIsLoaded = true;
1002  }
1003  }
1004  return Programs;
1005 }
1006 
1007 ProgramManager::ProgramPtr
1008 ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
1009  const std::string &CompileOptions,
1010  const std::string &LinkOptions,
1011  const RT::PiDevice &Device, uint32_t DeviceLibReqMask) {
1012 
1013  if (DbgProgMgr > 0) {
1014  std::cerr << ">>> ProgramManager::build(" << Program.get() << ", "
1015  << CompileOptions << ", " << LinkOptions << ", ... " << Device
1016  << ")\n";
1017  }
1018 
1019  // TODO: old sycl compiler always marks cassert fallback device library as
1020  // "required", this will lead to compatibilty issue when we enable online
1021  // link in SYCL runtime. If users compile their code with old compiler and run
1022  // their executable with latest SYCL runtime, cassert fallback spv file will
1023  // always be loaded which is not expected, cassert device library development
1024  // is still in progress, the unexpected loading may lead to runtime problem.
1025  // So, we clear bit 0 in device library require mask to avoid loading cassert
1026  // fallback device library and will revert this when cassert development is
1027  // done.
1028  DeviceLibReqMask &= 0xFFFFFFFE;
1029  bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1030 
1031  // TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
1032  // We do not link with other device libraries, because it may fail
1033  // due to unrecognized SPIR-V format of those libraries.
1034  if (CompileOptions.find(std::string("-cmc")) != std::string::npos ||
1035  CompileOptions.find(std::string("-vc-codegen")) != std::string::npos)
1036  LinkDeviceLibs = false;
1037 
1038  std::vector<RT::PiProgram> LinkPrograms;
1039  if (LinkDeviceLibs) {
1040  LinkPrograms = getDeviceLibPrograms(Context, Device, DeviceLibReqMask);
1041  }
1042 
1043  static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK");
1044  static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1');
1045 
1046  const detail::plugin &Plugin = Context->getPlugin();
1047  if (LinkPrograms.empty() && !ForceLink) {
1048  const std::string &Options = LinkOptions.empty()
1049  ? CompileOptions
1050  : (CompileOptions + " " + LinkOptions);
1051  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piProgramBuild>(
1052  Program.get(), /*num devices =*/1, &Device, Options.c_str(), nullptr,
1053  nullptr);
1054  if (Error != PI_SUCCESS)
1055  throw compile_program_error(getProgramBuildLog(Program.get(), Context),
1056  Error);
1057  return Program;
1058  }
1059 
1060  // Include the main program and compile/link everything together
1061  Plugin.call<PiApiKind::piProgramCompile>(Program.get(), /*num devices =*/1,
1062  &Device, CompileOptions.c_str(), 0,
1063  nullptr, nullptr, nullptr, nullptr);
1064  LinkPrograms.push_back(Program.get());
1065 
1066  RT::PiProgram LinkedProg = nullptr;
1067  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piProgramLink>(
1068  Context->getHandleRef(), /*num devices =*/1, &Device, LinkOptions.c_str(),
1069  LinkPrograms.size(), LinkPrograms.data(), nullptr, nullptr, &LinkedProg);
1070 
1071  // Link program call returns a new program object if all parameters are valid,
1072  // or NULL otherwise. Release the original (user) program.
1073  Program.reset(LinkedProg);
1074  if (Error != PI_SUCCESS) {
1075  if (LinkedProg) {
1076  // A non-trivial error occurred during linkage: get a build log, release
1077  // an incomplete (but valid) LinkedProg, and throw.
1078  throw compile_program_error(getProgramBuildLog(LinkedProg, Context),
1079  Error);
1080  }
1081  Plugin.checkPiResult(Error);
1082  }
1083  return Program;
1084 }
1085 
1086 static ProgramManager::KernelArgMask
1088  const int NBytesForSize = 8;
1089  const int NBitsInElement = 8;
1090  std::uint64_t SizeInBits = 0;
1091  for (int I = 0; I < NBytesForSize; ++I)
1092  SizeInBits |= static_cast<std::uint64_t>(Bytes[I]) << I * NBitsInElement;
1093 
1095  for (std::uint64_t I = 0; I < SizeInBits; ++I) {
1096  std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
1097  Result.push_back(Byte & (1 << (I % NBitsInElement)));
1098  }
1099 
1100  return Result;
1101 }
1102 
1103 void ProgramManager::cacheKernelUsesAssertInfo(OSModuleHandle M,
1104  RTDeviceBinaryImage &Img) {
1105  const pi::DeviceBinaryImage::PropertyRange &AssertUsedRange =
1106  Img.getAssertUsed();
1107  if (AssertUsedRange.isAvailable())
1108  for (const auto &Prop : AssertUsedRange) {
1109  KernelNameWithOSModule Key{Prop->Name, M};
1110  m_KernelUsesAssert.insert(Key);
1111  }
1112 }
1113 
1114 bool ProgramManager::kernelUsesAssert(OSModuleHandle M,
1115  const std::string &KernelName) const {
1116  KernelNameWithOSModule Key{KernelName, M};
1117  return m_KernelUsesAssert.find(Key) != m_KernelUsesAssert.end();
1118 }
1119 
1120 void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
1121  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1122  const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1123 
1124  for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) {
1125  pi_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]);
1126  OSModuleHandle M = OSUtil::getOSModuleHandle(RawImg);
1127  const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
1128  const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
1129  auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);
1130 
1131  // Fill the kernel argument mask map
1132  const pi::DeviceBinaryImage::PropertyRange &KPOIRange =
1133  Img->getKernelParamOptInfo();
1134  if (KPOIRange.isAvailable()) {
1135  KernelNameToArgMaskMap &ArgMaskMap =
1136  m_EliminatedKernelArgMasks[Img.get()];
1137  for (const auto &Info : KPOIRange)
1138  ArgMaskMap[Info->Name] =
1139  createKernelArgMask(pi::DeviceBinaryProperty(Info).asByteArray());
1140  }
1141 
1142  // Fill maps for kernel bundles
1143  if (EntriesB != EntriesE) {
1144  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1145 
1146  // Register all exported symbols
1147  auto ExportedSymbols = Img->getExportedSymbols();
1148  for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols)
1149  m_ExportedSymbols.insert(ExportedSymbol->Name);
1150 
1151  m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);
1152 
1153  for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1154  ++EntriesIt) {
1155 
1156  // Skip creating unique kernel ID if it is a service kernel.
1157  // SYCL service kernels are identified by having
1158  // __sycl_service_kernel__ in the mangled name, primarily as part of
1159  // the namespace of the name type.
1160  if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
1161  m_ServiceKernels.insert(EntriesIt->name);
1162  continue;
1163  }
1164 
1165  // Skip creating unique kernel ID if it is an exported device
1166  // function. Exported device functions appear in the offload entries
1167  // among kernels, but are identifiable by being listed in properties.
1168  if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1169  continue;
1170 
1171  // ... and create a unique kernel ID for the entry
1172  auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1173  if (It == m_KernelName2KernelIDs.end()) {
1174  std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1175  std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1176  sycl::kernel_id KernelID =
1177  detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1178 
1179  It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name,
1180  KernelID);
1181  }
1182 
1183  m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1184  m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1185  }
1186 
1187  // Sort kernel ids for faster search
1188  std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1189  m_BinImg2KernelIDs[Img.get()]->end(), LessByHash<kernel_id>{});
1190  }
1191 
1192  // TODO: Remove the code below once program manager works trought kernel
1193  // bundles only
1194  // Use the entry information if it's available
1195  if (EntriesB != EntriesE) {
1196  // The kernel sets for any pair of images are either disjoint or
1197  // identical, look up the kernel set using the first kernel name...
1198  StrToKSIdMap &KSIdMap = m_KernelSets[M];
1199  auto KSIdIt = KSIdMap.find(EntriesB->name);
1200  if (KSIdIt != KSIdMap.end()) {
1201  for (_pi_offload_entry EntriesIt = EntriesB + 1; EntriesIt != EntriesE;
1202  ++EntriesIt)
1203  assert(KSIdMap[EntriesIt->name] == KSIdIt->second &&
1204  "Kernel sets are not disjoint");
1205  auto &Imgs = m_DeviceImages[KSIdIt->second];
1206  assert(Imgs && "Device image vector should have been already created");
1207 
1208  cacheKernelUsesAssertInfo(M, *Img);
1209 
1210  Imgs->push_back(std::move(Img));
1211  continue;
1212  }
1213  // ... or create the set first if it hasn't been
1214  KernelSetId KSId = getNextKernelSetId();
1215  {
1216  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1217 
1218  for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1219  ++EntriesIt) {
1220  KSIdMap.insert(std::make_pair(EntriesIt->name, KSId));
1221  }
1222  }
1223  // ... and initialize associated device_global information
1224  {
1225  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1226 
1227  auto DeviceGlobals = Img->getDeviceGlobals();
1228  for (const pi_device_binary_property &DeviceGlobal : DeviceGlobals) {
1229  pi::ByteArray DeviceGlobalInfo =
1230  pi::DeviceBinaryProperty(DeviceGlobal).asByteArray();
1231 
1232  // The supplied device_global info property is expected to contain:
1233  // * 8 bytes - Size of the property.
1234  // * 4 bytes - Size of the underlying type in the device_global.
1235  // * 4 bytes - 0 if device_global has device_image_scope and any value
1236  // otherwise.
1237  assert(DeviceGlobalInfo.size() == 16 && "Unexpected property size");
1238  const std::uint32_t TypeSize =
1239  *reinterpret_cast<const std::uint32_t *>(&DeviceGlobalInfo[8]);
1240  const std::uint32_t DeviceImageScopeDecorated =
1241  *reinterpret_cast<const std::uint32_t *>(&DeviceGlobalInfo[12]);
1242 
1243  auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1244  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1245  // If it has already been registered we update the information.
1246  ExistingDeviceGlobal->second->initialize(TypeSize,
1247  DeviceImageScopeDecorated);
1248  } else {
1249  // If it has not already been registered we create a new entry.
1250  // Note: Pointer to the device global is not available here, so it
1251  // cannot be set until registration happens.
1252  auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1253  DeviceGlobal->Name, TypeSize, DeviceImageScopeDecorated);
1254  m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1255  }
1256  }
1257  }
1258  m_DeviceImages[KSId].reset(new std::vector<RTDeviceBinaryImageUPtr>());
1259  cacheKernelUsesAssertInfo(M, *Img);
1260 
1261  if (DumpImages)
1262  dumpImage(*Img, KSId);
1263  m_DeviceImages[KSId]->push_back(std::move(Img));
1264 
1265  continue;
1266  }
1267  // Otherwise assume that the image contains all kernels associated with the
1268  // module
1269  KernelSetId &KSId = m_OSModuleKernelSets[M];
1270  if (KSId == 0)
1271  KSId = getNextKernelSetId();
1272 
1273  auto &Imgs = m_DeviceImages[KSId];
1274  if (!Imgs)
1275  Imgs.reset(new std::vector<RTDeviceBinaryImageUPtr>());
1276 
1277  cacheKernelUsesAssertInfo(M, *Img);
1278 
1279  if (DumpImages)
1280  dumpImage(*Img, KSId);
1281  Imgs->push_back(std::move(Img));
1282  }
1283 }
1284 
1285 void ProgramManager::debugPrintBinaryImages() const {
1286  for (const auto &ImgVecIt : m_DeviceImages) {
1287  std::cerr << " ++++++ Kernel set: " << ImgVecIt.first << "\n";
1288  for (const auto &Img : *ImgVecIt.second)
1289  Img.get()->print();
1290  }
1291 }
1292 
1293 KernelSetId ProgramManager::getNextKernelSetId() const {
1294  // No need for atomic, should be guarded by the caller
1295  static KernelSetId Result = LastKSId;
1296  return ++Result;
1297 }
1298 
1300 ProgramManager::getKernelSetId(OSModuleHandle M,
1301  const std::string &KernelName) const {
1302  // If the env var instructs to use image from a file,
1303  // return the kernel set associated with it
1304  if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1305  return SpvFileKSId;
1306  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1307  auto KSIdMapIt = m_KernelSets.find(M);
1308  if (KSIdMapIt != m_KernelSets.end()) {
1309  const StrToKSIdMap &KSIdMap = KSIdMapIt->second;
1310  auto KSIdIt = KSIdMap.find(KernelName);
1311  // If the kernel has been assigned to a kernel set, return it
1312  if (KSIdIt != KSIdMap.end())
1313  return KSIdIt->second;
1314  }
1315  // If no kernel set was found check if there is a kernel set containing
1316  // all kernels in the given module
1317  auto ModuleKSIdIt = m_OSModuleKernelSets.find(M);
1318  if (ModuleKSIdIt != m_OSModuleKernelSets.end())
1319  return ModuleKSIdIt->second;
1320 
1321  throw runtime_error("No kernel named " + KernelName + " was found",
1322  PI_ERROR_INVALID_KERNEL_NAME);
1323 }
1324 
1325 void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img,
1326  KernelSetId KSId) const {
1327  std::string Fname("sycl_");
1328  const pi_device_binary_struct &RawImg = Img.getRawData();
1329  Fname += RawImg.DeviceTargetSpec;
1330  Fname += std::to_string(KSId);
1331  std::string Ext;
1332 
1333  RT::PiDeviceBinaryType Format = Img.getFormat();
1334  if (Format == PI_DEVICE_BINARY_TYPE_SPIRV)
1335  Ext = ".spv";
1336  else if (Format == PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE)
1337  Ext = ".bc";
1338  else
1339  Ext = ".bin";
1340  Fname += Ext;
1341 
1342  std::ofstream F(Fname, std::ios::binary);
1343 
1344  if (!F.is_open()) {
1345  throw runtime_error("Can not write " + Fname, PI_ERROR_UNKNOWN);
1346  }
1347  Img.dump(F);
1348  F.close();
1349 }
1350 
1351 void ProgramManager::flushSpecConstants(const program_impl &Prg,
1352  RT::PiProgram NativePrg,
1353  const RTDeviceBinaryImage *Img) {
1354  if (DbgProgMgr > 2) {
1355  std::cerr << ">>> ProgramManager::flushSpecConstants(" << Prg.get()
1356  << ",...)\n";
1357  }
1358  if (!Prg.hasSetSpecConstants())
1359  return; // nothing to do
1360  pi::PiProgram PrgHandle = Prg.getHandleRef();
1361  // program_impl can't correspond to two different native programs
1362  assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1363  NativePrg = NativePrg ? NativePrg : PrgHandle;
1364 
1365  if (!Img) {
1366  // caller hasn't provided the image object - find it
1367  { // make sure NativePrograms map access is synchronized
1368  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1369  auto It = NativePrograms.find(NativePrg);
1370  if (It == NativePrograms.end())
1372  "spec constant is set in a program w/o a binary image",
1373  PI_ERROR_INVALID_OPERATION);
1374  Img = It->second;
1375  }
1376  if (!Img->supportsSpecConstants()) {
1377  if (DbgProgMgr > 0)
1378  std::cerr << ">>> ProgramManager::flushSpecConstants: binary image "
1379  << &Img->getRawData() << " doesn't support spec constants\n";
1380  // This device binary image does not support runtime setting of
1381  // specialization constants; compiler must have generated default values.
1382  // NOTE: Can't throw here, as it would always take place with AOT
1383  //-compiled code. New Khronos 2020 spec should fix this inconsistency.
1384  return;
1385  }
1386  }
1387  Prg.flush_spec_constants(*Img, NativePrg);
1388 }
1389 
1390 // If the kernel is loaded from spv file, it may not include DeviceLib require
1391 // mask, sycl runtime won't know which fallback device libraries are needed. In
1392 // such case, the safest way is to load all fallback device libraries.
1393 uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) {
1394  const pi::DeviceBinaryImage::PropertyRange &DLMRange =
1395  Img.getDeviceLibReqMask();
1396  if (DLMRange.isAvailable())
1397  return pi::DeviceBinaryProperty(*(DLMRange.begin())).asUint32();
1398  else
1399  return 0xFFFFFFFF;
1400 }
1401 
1402 // TODO consider another approach with storing the masks in the integration
1403 // header instead.
1404 ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask(
1405  OSModuleHandle M, pi::PiProgram NativePrg, const std::string &KernelName) {
1406  // If instructed to use a spv file, assume no eliminated arguments.
1407  if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1408  return {};
1409 
1410  // Bail out if there are no eliminated kernel arg masks in our images
1411  if (m_EliminatedKernelArgMasks.empty())
1412  return {};
1413 
1414  {
1415  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1416  auto ImgIt = NativePrograms.find(NativePrg);
1417  if (ImgIt != NativePrograms.end()) {
1418  auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1419  if (MapIt != m_EliminatedKernelArgMasks.end())
1420  return MapIt->second[KernelName];
1421  return {};
1422  }
1423  }
1424 
1425  // If the program was not cached iterate over all available images looking for
1426  // the requested kernel
1427  for (auto &Elem : m_EliminatedKernelArgMasks) {
1428  auto ArgMask = Elem.second.find(KernelName);
1429  if (ArgMask != Elem.second.end())
1430  return ArgMask->second;
1431  }
1432 
1433  // The kernel is not generated by DPCPP stack, so a mask doesn't exist for it
1434  return {};
1435 }
1436 
1438  auto IsAOTBinary = [](const char *Format) {
1439  return (
1440  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) ||
1441  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) ||
1442  (strcmp(Format, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0));
1443  };
1444 
1445  // There are only two initial states so far - SPIRV which needs to be compiled
1446  // and linked and fully compiled(AOTed) binary
1447 
1448  const bool IsAOT = IsAOTBinary(BinImage->getRawData().DeviceTargetSpec);
1449 
1450  return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1451 }
1452 
1454  const device &Dev) {
1455  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1457  auto &Plugin = DeviceImpl->getPlugin();
1458 
1459  const RT::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef();
1460 
1461  // Call piextDeviceSelectBinary with only one image to check if an image is
1462  // compatible with implementation. The function returns invalid index if no
1463  // device images are compatible.
1464  pi_uint32 SuitableImageID = std::numeric_limits<pi_uint32>::max();
1465  pi_device_binary DevBin =
1466  const_cast<pi_device_binary>(&BinImage->getRawData());
1467  RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piextDeviceSelectBinary>(
1468  PIDeviceHandle, &DevBin,
1469  /*num bin images = */ (pi_uint32)1, &SuitableImageID);
1470  if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1471  throw runtime_error("Invalid binary image or device",
1472  PI_ERROR_INVALID_VALUE);
1473 
1474  return (0 == SuitableImageID);
1475 }
1476 
1477 kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) {
1478  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1479 
1480  auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1481  if (KernelID == m_KernelName2KernelIDs.end())
1482  throw runtime_error("No kernel found with the specified name",
1483  PI_ERROR_INVALID_KERNEL_NAME);
1484 
1485  return KernelID->second;
1486 }
1487 
1488 bool ProgramManager::hasCompatibleImage(const device &Dev) {
1489  std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1490 
1491  return std::any_of(
1492  m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1493  [&](std::pair<RTDeviceBinaryImage *,
1494  std::shared_ptr<std::vector<kernel_id>>>
1495  Elem) { return compatibleWithDevice(Elem.first, Dev); });
1496 }
1497 
1498 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1499  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1500 
1501  std::vector<sycl::kernel_id> AllKernelIDs;
1502  AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1503  for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1504  AllKernelIDs.push_back(KernelID.second);
1505  }
1506  return AllKernelIDs;
1507 }
1508 
1509 kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) {
1510  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1511 
1512  auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1513  if (KernelID == m_BuiltInKernelIDs.end()) {
1514  auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1515  auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1516  KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1517  }
1518 
1519  return KernelID->second;
1520 }
1521 
1522 void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
1523  const char *UniqueId) {
1524  std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1525 
1526  auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1527  if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1528  // Update the existing information and add the entry to the pointer map.
1529  ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1530  m_Ptr2DeviceGlobal.insert(
1531  {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1532  return;
1533  }
1534 
1535  auto EntryUPtr =
1536  std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1537  auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1538  m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1539 }
1540 
1541 std::vector<device_image_plain>
1542 ProgramManager::getSYCLDeviceImagesWithCompatibleState(
1543  const context &Ctx, const std::vector<device> &Devs,
1544  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs) {
1545 
1546  // Collect unique raw device images taking into account kernel ids passed
1547  // TODO: Can we avoid repacking?
1548  std::set<RTDeviceBinaryImage *> BinImages;
1549  if (!KernelIDs.empty()) {
1550  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1551  for (const kernel_id &KID : KernelIDs) {
1552  auto Range = m_KernelIDs2BinImage.equal_range(KID);
1553  for (auto It = Range.first, End = Range.second; It != End; ++It)
1554  BinImages.insert(It->second);
1555  }
1556  } else {
1557  std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1558  for (auto &ImagesSets : m_DeviceImages) {
1559  auto &ImagesUPtrs = *ImagesSets.second.get();
1560  for (auto &ImageUPtr : ImagesUPtrs)
1561  BinImages.insert(ImageUPtr.get());
1562  }
1563  }
1564  assert(BinImages.size() > 0 && "Expected to find at least on device image");
1565 
1566  std::vector<device_image_plain> SYCLDeviceImages;
1567  for (RTDeviceBinaryImage *BinImage : BinImages) {
1568  const bundle_state ImgState = getBinImageState(BinImage);
1569 
1570  // Ignore images with incompatible state. Image is considered compatible
1571  // with a target state if an image is already in the target state or can
1572  // be brought to target state by compiling/linking/building.
1573  //
1574  // Example: an image in "executable" state is not compatible with
1575  // "input" target state - there is no operation to convert the image it
1576  // to "input" state. An image in "input" state is compatible with
1577  // "executable" target state because it can be built to get into
1578  // "executable" state.
1579  if (ImgState > TargetState)
1580  continue;
1581 
1582  for (const sycl::device &Dev : Devs) {
1583  if (!compatibleWithDevice(BinImage, Dev))
1584  continue;
1585 
1586  std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1587  // Collect kernel names for the image
1588  {
1589  std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1590  KernelIDs = m_BinImg2KernelIDs[BinImage];
1591  // If the image does not contain any non-service kernels we can skip it.
1592  if (!KernelIDs || KernelIDs->empty())
1593  continue;
1594  }
1595 
1596  DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
1597  BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
1598 
1599  SYCLDeviceImages.push_back(
1600  createSyclObjFromImpl<device_image_plain>(Impl));
1601  break;
1602  }
1603  }
1604 
1605  return SYCLDeviceImages;
1606 }
1607 
1608 void ProgramManager::bringSYCLDeviceImagesToState(
1609  std::vector<device_image_plain> &DeviceImages, bundle_state TargetState) {
1610 
1611  for (device_image_plain &DevImage : DeviceImages) {
1612  const bundle_state DevImageState = getSyclObjImpl(DevImage)->get_state();
1613 
1614  switch (TargetState) {
1615  case bundle_state::input:
1616  // Do nothing since there is no state which can be upgraded to the input.
1617  assert(DevImageState == bundle_state::input);
1618  break;
1619  case bundle_state::object:
1620  if (DevImageState == bundle_state::input) {
1621  DevImage = compile(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1622  /*PropList=*/{});
1623  break;
1624  }
1625  // Device image is expected to be object state then.
1626  assert(DevImageState == bundle_state::object);
1627  break;
1628  case bundle_state::executable: {
1629  switch (DevImageState) {
1630  case bundle_state::input:
1631  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1632  /*PropList=*/{});
1633  break;
1634  case bundle_state::object: {
1635  std::vector<device_image_plain> LinkedDevImages =
1636  link({DevImage}, getSyclObjImpl(DevImage)->get_devices(),
1637  /*PropList=*/{});
1638  // Since only one device image is passed here one output device image is
1639  // expected
1640  assert(LinkedDevImages.size() == 1 && "Expected one linked image here");
1641  DevImage = LinkedDevImages[0];
1642  break;
1643  }
1644  case bundle_state::executable:
1645  DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1646  /*PropList=*/{});
1647  break;
1648  }
1649  break;
1650  }
1651  }
1652  }
1653 }
1654 
1655 std::vector<device_image_plain>
1656 ProgramManager::getSYCLDeviceImages(const context &Ctx,
1657  const std::vector<device> &Devs,
1658  bundle_state TargetState) {
1659  // Collect device images with compatible state
1660  std::vector<device_image_plain> DeviceImages =
1661  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1662  // Brind device images with compatible state to desired state
1663  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1664  return DeviceImages;
1665 }
1666 
1667 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1668  const context &Ctx, const std::vector<device> &Devs,
1669  const DevImgSelectorImpl &Selector, bundle_state TargetState) {
1670  // Collect device images with compatible state
1671  std::vector<device_image_plain> DeviceImages =
1672  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1673 
1674  // Filter out images that are rejected by Selector
1675  auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1676  [&Selector](const device_image_plain &Image) {
1677  return !Selector(getSyclObjImpl(Image));
1678  });
1679  DeviceImages.erase(It, DeviceImages.end());
1680 
1681  // The spec says that the function should not call online compiler or linker
1682  // to translate device images into target state
1683  return DeviceImages;
1684 }
1685 
1686 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1687  const context &Ctx, const std::vector<device> &Devs,
1688  const std::vector<kernel_id> &KernelIDs, bundle_state TargetState) {
1689  // Fast path for when no kernel IDs are requested
1690  if (KernelIDs.empty())
1691  return {};
1692 
1693  {
1694  std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1695 
1696  for (auto &It : m_BuiltInKernelIDs) {
1697  if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1698  KernelIDs.end())
1699  throw sycl::exception(make_error_code(errc::kernel_argument),
1700  "Attempting to use a built-in kernel. They are "
1701  "not fully supported");
1702  }
1703  }
1704 
1705  // Collect device images with compatible state
1706  std::vector<device_image_plain> DeviceImages =
1707  getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1708 
1709  // Brind device images with compatible state to desired state
1710  bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1711  return DeviceImages;
1712 }
1713 
1716  const std::vector<device> &Devs,
1717  const property_list &) {
1718 
1719  // TODO: Extract compile options from property list once the Spec clarifies
1720  // how they can be passed.
1721 
1722  // TODO: Probably we could have cached compiled device images.
1723  const std::shared_ptr<device_image_impl> &InputImpl =
1724  getSyclObjImpl(DeviceImage);
1725 
1726  const detail::plugin &Plugin =
1727  getSyclObjImpl(InputImpl->get_context())->getPlugin();
1728 
1729  // TODO: Add support for creating non-SPIRV programs from multiple devices.
1730  if (InputImpl->get_bin_image_ref()->getFormat() !=
1732  Devs.size() > 1)
1733  sycl::runtime_error(
1734  "Creating a program from AOT binary for multiple device is not "
1735  "supported",
1736  PI_ERROR_INVALID_OPERATION);
1737 
1738  // Device is not used when creating program from SPIRV, so passing only one
1739  // device is OK.
1740  RT::PiProgram Prog = createPIProgram(*InputImpl->get_bin_image_ref(),
1741  InputImpl->get_context(), Devs[0]);
1742 
1743  if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
1744  enableITTAnnotationsIfNeeded(Prog, Plugin);
1745 
1746  DeviceImageImplPtr ObjectImpl = std::make_shared<detail::device_image_impl>(
1747  InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
1748  bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog,
1749  InputImpl->get_spec_const_data_ref(),
1750  InputImpl->get_spec_const_blob_ref());
1751 
1752  std::vector<pi_device> PIDevices;
1753  PIDevices.reserve(Devs.size());
1754  for (const device &Dev : Devs)
1755  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
1756 
1757  // TODO: Set spec constatns here.
1758 
1759  // TODO: Handle zero sized Device list.
1760  std::string CompileOptions;
1761  applyCompileOptionsFromEnvironment(CompileOptions);
1762  appendCompileOptionsFromImage(CompileOptions,
1763  *(InputImpl->get_bin_image_ref()));
1765  ObjectImpl->get_program_ref(), /*num devices=*/Devs.size(),
1766  PIDevices.data(), CompileOptions.c_str(),
1767  /*num_input_headers=*/0, /*input_headers=*/nullptr,
1768  /*header_include_names=*/nullptr,
1769  /*pfn_notify=*/nullptr, /*user_data*/ nullptr);
1770  if (Error != PI_SUCCESS)
1771  throw sycl::exception(
1773  getProgramBuildLog(ObjectImpl->get_program_ref(),
1774  getSyclObjImpl(ObjectImpl->get_context())));
1775 
1776  return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
1777 }
1778 
1779 std::vector<device_image_plain>
1780 ProgramManager::link(const std::vector<device_image_plain> &DeviceImages,
1781  const std::vector<device> &Devs,
1782  const property_list &PropList) {
1783  (void)PropList;
1784 
1785  std::vector<pi_program> PIPrograms;
1786  PIPrograms.reserve(DeviceImages.size());
1787  for (const device_image_plain &DeviceImage : DeviceImages)
1788  PIPrograms.push_back(getSyclObjImpl(DeviceImage)->get_program_ref());
1789 
1790  std::vector<pi_device> PIDevices;
1791  PIDevices.reserve(Devs.size());
1792  for (const device &Dev : Devs)
1793  PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef());
1794 
1795  std::string LinkOptionsStr;
1796  applyLinkOptionsFromEnvironment(LinkOptionsStr);
1797  if (LinkOptionsStr.empty()) {
1798  for (const device_image_plain &DeviceImage : DeviceImages) {
1799  const std::shared_ptr<device_image_impl> &InputImpl =
1800  getSyclObjImpl(DeviceImage);
1801  appendLinkOptionsFromImage(LinkOptionsStr,
1802  *(InputImpl->get_bin_image_ref()));
1803  }
1804  }
1805  const context &Context = getSyclObjImpl(DeviceImages[0])->get_context();
1806  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
1807  const detail::plugin &Plugin = ContextImpl->getPlugin();
1808 
1809  RT::PiProgram LinkedProg = nullptr;
1811  ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
1812  /*options=*/LinkOptionsStr.c_str(), PIPrograms.size(), PIPrograms.data(),
1813  /*pfn_notify=*/nullptr,
1814  /*user_data=*/nullptr, &LinkedProg);
1815 
1816  if (Error != PI_SUCCESS) {
1817  if (LinkedProg) {
1818  const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
1819  throw sycl::exception(make_error_code(errc::build), ErrorMsg);
1820  }
1821  Plugin.reportPiError(Error, "link()");
1822  }
1823 
1824  std::shared_ptr<std::vector<kernel_id>> KernelIDs{new std::vector<kernel_id>};
1825  for (const device_image_plain &DeviceImage : DeviceImages) {
1826  // Duplicates are not expected here, otherwise piProgramLink should fail
1827  KernelIDs->insert(
1828  KernelIDs->end(),
1829  getSyclObjImpl(DeviceImage)->get_kernel_ids_ptr()->begin(),
1830  getSyclObjImpl(DeviceImage)->get_kernel_ids_ptr()->end());
1831  }
1832  // device_image_impl expects kernel ids to be sorted for fast search
1833  std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash<kernel_id>{});
1834 
1835  DeviceImageImplPtr ExecutableImpl =
1836  std::make_shared<detail::device_image_impl>(
1837  /*BinImage=*/nullptr, Context, Devs, bundle_state::executable,
1838  std::move(KernelIDs), LinkedProg);
1839 
1840  // TODO: Make multiple sets of device images organized by devices they are
1841  // compiled for.
1842  return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
1843 }
1844 
1845 // The function duplicates most of the code from existing getBuiltPIProgram.
1846 // The differences are:
1847 // Different API - uses different objects to extract required info
1848 // Supports caching of a program built for multiple devices
1850  const std::vector<device> &Devs,
1851  const property_list &PropList) {
1852  (void)PropList;
1853 
1854  const std::shared_ptr<device_image_impl> &InputImpl =
1855  getSyclObjImpl(DeviceImage);
1856 
1857  const context Context = InputImpl->get_context();
1858 
1859  const ContextImplPtr ContextImpl = getSyclObjImpl(Context);
1860 
1861  using PiProgramT = KernelProgramCache::PiProgramT;
1862  using ProgramCacheT = KernelProgramCache::ProgramCacheT;
1863 
1864  KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();
1865 
1866  auto AcquireF = [](KernelProgramCache &Cache) {
1867  return Cache.acquireCachedPrograms();
1868  };
1869  auto GetF = [](const Locked<ProgramCacheT> &LockedCache) -> ProgramCacheT & {
1870  return LockedCache.get();
1871  };
1872 
1873  std::string CompileOpts;
1874  std::string LinkOpts;
1875  applyOptionsFromEnvironment(CompileOpts, LinkOpts);
1876 
1877  const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref();
1878  const RTDeviceBinaryImage &Img = *ImgPtr;
1879 
1880  SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
1881 
1882  // TODO: Unify this code with getBuiltPIProgram
1883  auto BuildF = [this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
1884  &InputImpl, SpecConsts] {
1885  applyOptionsFromImage(CompileOpts, LinkOpts, Img);
1886  ContextImplPtr ContextImpl = getSyclObjImpl(Context);
1887  const detail::plugin &Plugin = ContextImpl->getPlugin();
1888 
1889  // TODO: Add support for creating non-SPIRV programs from multiple devices.
1890  if (InputImpl->get_bin_image_ref()->getFormat() !=
1892  Devs.size() > 1)
1893  sycl::runtime_error(
1894  "Creating a program from AOT binary for multiple device is not "
1895  "supported",
1896  PI_ERROR_INVALID_OPERATION);
1897 
1898  // Device is not used when creating program from SPIRV, so passing only one
1899  // device is OK.
1900  auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
1901  Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
1902 
1903  if (!DeviceCodeWasInCache &&
1904  InputImpl->get_bin_image_ref()->supportsSpecConstants()) {
1905  enableITTAnnotationsIfNeeded(NativePrg, Plugin);
1906 
1907  std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
1908  const std::map<std::string,
1909  std::vector<device_image_impl::SpecConstDescT>>
1910  &SpecConstData = InputImpl->get_spec_const_data_ref();
1911 
1912  for (const auto &DescPair : SpecConstData) {
1913  for (const device_image_impl::SpecConstDescT &SpecIDDesc :
1914  DescPair.second) {
1915  if (SpecIDDesc.IsSet) {
1917  NativePrg, SpecIDDesc.ID, SpecIDDesc.Size,
1918  SpecConsts.data() + SpecIDDesc.BlobOffset);
1919  }
1920  }
1921  }
1922  }
1923 
1924  ProgramPtr ProgramManaged(
1925  NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease);
1926 
1927  // Link a fallback implementation of device libraries if they are not
1928  // supported by a device compiler.
1929  // Pre-compiled programs are supposed to be already linked.
1930  // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
1931  // no fallback device library will be linked.
1932  uint32_t DeviceLibReqMask = 0;
1933  if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV &&
1935  DeviceLibReqMask = getDeviceLibReqMask(Img);
1936 
1937  ProgramPtr BuiltProgram =
1938  build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
1939  getRawSyclObjImpl(Devs[0])->getHandleRef(), DeviceLibReqMask);
1940 
1941  emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl);
1942 
1943  {
1944  std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1945  NativePrograms[BuiltProgram.get()] = &Img;
1946  }
1947 
1948  // Save program to persistent cache if it is not there
1949  if (!DeviceCodeWasInCache)
1950  PersistentDeviceCodeCache::putItemToDisc(
1951  Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
1952 
1953  return BuiltProgram.release();
1954  };
1955 
1956  const RT::PiDevice PiDevice = getRawSyclObjImpl(Devs[0])->getHandleRef();
1957  // TODO: Throw SYCL2020 style exception
1958  auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
1959  Cache,
1960  std::make_pair(std::make_pair(std::move(SpecConsts), (size_t)ImgPtr),
1961  std::make_pair(PiDevice, CompileOpts + LinkOpts)),
1962  AcquireF, GetF, BuildF);
1963  // getOrBuild is not supposed to return nullptr
1964  assert(BuildResult != nullptr && "Invalid build result");
1965 
1966  RT::PiProgram ResProgram = BuildResult->Ptr.load();
1967 
1968  // Cache supports key with once device only, but here we have multiple
1969  // devices a program is built for, so add the program to the cache for all
1970  // other devices.
1971  const detail::plugin &Plugin = ContextImpl->getPlugin();
1972  auto CacheOtherDevices = [ResProgram, &Plugin]() {
1973  Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
1974  return ResProgram;
1975  };
1976 
1977  // The program for device "0" is already added to the cache during the first
1978  // call to getOrBuild, so starting with "1"
1979  for (size_t Idx = 1; Idx < Devs.size(); ++Idx) {
1980  const RT::PiDevice PiDeviceAdd =
1981  getRawSyclObjImpl(Devs[Idx])->getHandleRef();
1982 
1983  getOrBuild<PiProgramT, compile_program_error>(
1984  Cache,
1985  std::make_pair(std::make_pair(std::move(SpecConsts), (size_t)ImgPtr),
1986  std::make_pair(PiDeviceAdd, CompileOpts + LinkOpts)),
1987  AcquireF, GetF, CacheOtherDevices);
1988  // getOrBuild is not supposed to return nullptr
1989  assert(BuildResult != nullptr && "Invalid build result");
1990  }
1991 
1992  // devive_image_impl shares ownership of PIProgram with, at least, program
1993  // cache. The ref counter will be descremented in the destructor of
1994  // device_image_impl
1995  Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
1996 
1997  DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
1998  InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
1999  InputImpl->get_kernel_ids_ptr(), ResProgram,
2000  InputImpl->get_spec_const_data_ref(),
2001  InputImpl->get_spec_const_blob_ref());
2002 
2003  return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2004 }
2005 
2006 std::pair<RT::PiKernel, std::mutex *> ProgramManager::getOrCreateKernel(
2007  const context &Context, const std::string &KernelName,
2008  const property_list &PropList, RT::PiProgram Program) {
2009 
2010  (void)PropList;
2011 
2012  const ContextImplPtr Ctx = getSyclObjImpl(Context);
2013 
2014  using PiKernelT = KernelProgramCache::PiKernelT;
2015  using KernelCacheT = KernelProgramCache::KernelCacheT;
2016  using KernelByNameT = KernelProgramCache::KernelByNameT;
2017 
2018  KernelProgramCache &Cache = Ctx->getKernelProgramCache();
2019 
2020  auto AcquireF = [](KernelProgramCache &Cache) {
2021  return Cache.acquireKernelsPerProgramCache();
2022  };
2023  auto GetF =
2024  [&Program](const Locked<KernelCacheT> &LockedCache) -> KernelByNameT & {
2025  return LockedCache.get()[Program];
2026  };
2027  auto BuildF = [&Program, &KernelName, &Ctx] {
2028  PiKernelT *Result = nullptr;
2029 
2030  const detail::plugin &Plugin = Ctx->getPlugin();
2031  Plugin.call<PiApiKind::piKernelCreate>(Program, KernelName.c_str(),
2032  &Result);
2033 
2035  sizeof(pi_bool), &PI_TRUE);
2036 
2037  return Result;
2038  };
2039 
2040  auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
2041  Cache, KernelName, AcquireF, GetF, BuildF);
2042  // getOrBuild is not supposed to return nullptr
2043  assert(BuildResult != nullptr && "Invalid build result");
2044  return std::make_pair(BuildResult->Ptr.load(),
2045  &(BuildResult->MBuildResultMutex));
2046 }
2047 
2048 } // namespace detail
2049 } // namespace sycl
2050 } // __SYCL_INLINE_NAMESPACE(cl)
2051 
2054 }
2055 
2056 // Executed as a part of current module's (.exe, .dll) static initialization
2058  (void)desc;
2059  // TODO implement the function
2060 }
cl::sycl::detail::getRawSyclObjImpl
detail::add_pointer_t< typename decltype(T::impl)::element_type > getRawSyclObjImpl(const T &SyclObject)
Definition: common.hpp:213
cl::sycl::detail::KernelProgramCache::BuildError::Code
pi_int32 Code
Definition: kernel_program_cache.hpp:37
cl::sycl::detail::pi::DeviceBinaryImage::getDeviceGlobals
const PropertyRange getDeviceGlobals() const
Definition: pi.hpp:379
cl::sycl::detail::pi::DeviceBinaryImage::getKernelParamOptInfo
const PropertyRange & getKernelParamOptInfo() const
Definition: pi.hpp:361
PI_DEVICE_BINARY_TYPE_NONE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE
Definition: pi.h:682
piKernelCreate
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
Definition: pi_esimd_emulator.cpp:1343
cl::sycl::backend
backend
Definition: backend_types.hpp:21
cl::sycl::detail::KernelProgramCache::BuildResult
Denotes pointer to some entity with its general state and build error.
Definition: kernel_program_cache.hpp:48
cl::sycl::detail::Locked
Represents a reference to value with appropriate lock acquired.
Definition: locked.hpp:24
cl::sycl::detail::DbgProgMgr
static constexpr int DbgProgMgr
Definition: program_manager.cpp:46
cl::sycl::detail::applyLinkOptionsFromEnvironment
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts)
Definition: program_manager.cpp:444
_pi_offload_entry_struct
Definition: pi.h:639
cl::sycl::detail::pi::DeviceBinaryImage::getLinkOptions
const char * getLinkOptions() const
Definition: pi.hpp:313
cl::sycl::detail::pi::DeviceBinaryProperty::asByteArray
ByteArray asByteArray() const
Definition: pi.cpp:643
cl::sycl::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:56
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:31
cl::sycl::detail::isDeviceLibRequired
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:941
cl::sycl::exception::get_cl_code
cl_int get_cl_code() const
Definition: exception.cpp:114
pi_bool
pi_uint32 pi_bool
Definition: pi.h:96
context_impl.hpp
cl::sycl::detail::KernelProgramCache::saveKernel
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
Definition: kernel_program_cache.hpp:126
cl::sycl::detail::make_tuple
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:36
cl::sycl::detail::enableITTAnnotationsIfNeeded
static void enableITTAnnotationsIfNeeded(const RT::PiProgram &Prog, const plugin &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
Definition: program_manager.cpp:54
cl::sycl::detail::RTDeviceBinaryImage::supportsSpecConstants
bool supportsSpecConstants() const
Definition: device_binary_image.hpp:38
pi_device_binaries_struct::DeviceBinaries
pi_device_binary DeviceBinaries
Device binaries data.
Definition: pi.h:863
PI_DEVICE_BINARY_TYPE_SPIRV
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV
Definition: pi.h:687
type_traits.hpp
PI_DEVICE_INFO_NAME
@ PI_DEVICE_INFO_NAME
Definition: pi.h:237
cl::sycl::detail::SpvFileKSId
constexpr KernelSetId SpvFileKSId
Definition: common.hpp:349
cl::sycl::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:20
cl::sycl::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:714
pi_device_binary_struct::DeviceTargetSpec
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
Definition: pi.h:778
config.hpp
piProgramLink
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
Definition: pi_opencl.cpp:794
cl::sycl::detail::getDeviceLibPrograms
static std::vector< RT::PiProgram > getDeviceLibPrograms(const ContextImplPtr Context, const RT::PiDevice &Device, uint32_t DeviceLibReqMask)
Definition: program_manager.cpp:949
cl::sycl::detail::pi::DeviceBinaryImage::getFormat
pi::PiDeviceBinaryType getFormat() const
Returns the format of the binary image.
Definition: pi.hpp:319
cl::sycl::detail::KernelProgramCache
Definition: kernel_program_cache.hpp:31
PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
Definition: pi.h:689
cl::sycl::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:667
stl.hpp
cl::sycl::detail::RTDeviceBinaryImage::print
void print() const override
Definition: device_binary_image.hpp:44
cl::sycl::detail::pi::PiProgram
::pi_program PiProgram
Definition: pi.hpp:107
device.hpp
cl::sycl::detail::waitUntilBuilt
RetT * waitUntilBuilt(KernelProgramCache &Cache, KernelProgramCache::BuildResult< RetT > *BuildResult)
Definition: program_manager.cpp:121
cl::sycl::detail::KernelProgramCache::KernelByNameT
std::map< std::string, KernelWithBuildStateT > KernelByNameT
Definition: kernel_program_cache.hpp:82
cl::sycl::detail::pi::getBinaryImageFormat
PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
Definition: pi.cpp:705
cl::sycl::detail::getOrBuild
KernelProgramCache::BuildResult< RetT > * getOrBuild(KernelProgramCache &KPCache, KeyT &&CacheKey, AcquireFT &&Acquire, GetCacheFT &&GetCache, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
Definition: program_manager.cpp:163
piProgramRetain
pi_result piProgramRetain(pi_program program)
Definition: pi_esimd_emulator.cpp:1330
cl::sycl::detail::KernelProgramCache::BuildError
Denotes build error data.
Definition: kernel_program_cache.hpp:35
cl::sycl::detail::device_image_plain
Definition: kernel_bundle.hpp:71
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::end
ConstIterator end() const
Definition: pi.hpp:278
cl::sycl::detail::pi::DeviceBinaryImage::getCompileOptions
const char * getCompileOptions() const
Definition: pi.hpp:308
cl::sycl::detail::pi::PiDevice
::pi_device PiDevice
Definition: pi.hpp:102
cl::sycl::detail::KernelProgramCache::notifyAllBuild
void notifyAllBuild(BuildResult< T > &BR) const
Definition: kernel_program_cache.hpp:111
pi_device_binary_struct::BinaryStart
const unsigned char * BinaryStart
Pointer to the target code start.
Definition: pi.h:790
cl::sycl::detail::SYCLConfig
Definition: config.hpp:105
cl::sycl::detail::BuildState
BuildState
Definition: program_manager.cpp:48
_pi_result
_pi_result
Definition: pi.h:105
cl::sycl::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:60
context.hpp
cl::sycl::platform::get_info
info::param_traits< info::platform, param >::return_type get_info() const
Queries this SYCL platform for info.
Definition: platform.cpp:54
cl::sycl::errc::feature_not_supported
@ feature_not_supported
cl::sycl::detail::getFormatStr
static const char * getFormatStr(RT::PiDeviceBinaryType Format)
Definition: program_manager.cpp:291
cl::sycl::detail::program_impl
Definition: program_impl.hpp:37
piProgramCompile
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
piContextGetInfo
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:858
os_util.hpp
cl::sycl::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:121
cl::sycl::detail::LastKSId
constexpr KernelSetId LastKSId
Definition: common.hpp:350
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:855
cl::sycl::detail::KernelProgramCache::PiKernelT
std::remove_pointer< RT::PiKernel >::type PiKernelT
Definition: kernel_program_cache.hpp:78
pi_device_binary_struct::BinaryEnd
const unsigned char * BinaryEnd
Pointer to the target code end.
Definition: pi.h:792
cl::sycl::detail::pi::PiDeviceBinaryType
::pi_device_binary_type PiDeviceBinaryType
Definition: pi.hpp:105
sycl
Definition: invoke_simd.hpp:68
cl::sycl::detail::plugin::getPiPlugin
const RT::PiPlugin & getPiPlugin() const
Definition: plugin.hpp:106
cl::sycl::detail::isDeviceBinaryTypeSupported
static bool isDeviceBinaryTypeSupported(const context &C, RT::PiDeviceBinaryType Format)
Definition: program_manager.cpp:247
cl::sycl::detail::RTDeviceBinaryImage::getRawData
const pi_device_binary_struct & getRawData() const
Definition: device_binary_image.hpp:42
PI_USM_INDIRECT_ACCESS
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
Definition: pi.h:1284
device_impl.hpp
cl::sycl::detail::BS_Failed
@ BS_Failed
Definition: program_manager.cpp:48
cl::sycl::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
PI_CONTEXT_INFO_NUM_DEVICES
@ PI_CONTEXT_INFO_NUM_DEVICES
Definition: pi.h:301
util.hpp
cl::sycl::detail::applyOptionsFromEnvironment
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
Definition: program_manager.cpp:453
cl::sycl::detail::KernelProgramCache::KernelCacheT
std::map< RT::PiProgram, KernelByNameT > KernelCacheT
Definition: kernel_program_cache.hpp:83
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
Definition: pi.h:713
cl::sycl::detail::KernelSetId
size_t KernelSetId
Definition: common.hpp:346
PI_KERNEL_INFO_PROGRAM
@ PI_KERNEL_INFO_PROGRAM
Definition: pi.h:323
cl::sycl::detail::KernelProgramCache::BuildResult::Ptr
std::atomic< T * > Ptr
Definition: kernel_program_cache.hpp:49
cl::sycl::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
cl::sycl::detail::plugin::call_nocheck
RT::PiResult call_nocheck(ArgsT... Args) const
Calls the PiApi, traces the call, and returns the result.
Definition: plugin.hpp:170
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:773
cl::sycl::context::get_platform
platform get_platform() const
Gets platform associated with this SYCL context.
Definition: context.cpp:124
cl::sycl::detail::applyOptionsFromImage
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:427
piProgramGetInfo
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1302
compile_program_error
cl::sycl::detail::pi::DeviceBinaryImage::getExportedSymbols
const PropertyRange getExportedSymbols() const
Definition: pi.hpp:372
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::begin
ConstIterator begin() const
Definition: pi.hpp:277
char
cl::sycl::detail::program_impl::get
cl_program get() const
Returns a valid cl_program instance.
Definition: program_impl.cpp:215
cl::sycl::detail::pi::DeviceBinaryProperty::asUint32
pi_uint32 asUint32() const
Definition: pi.cpp:636
cl::sycl::detail::pi::DeviceBinaryImage::getProgramMetadata
const PropertyRange & getProgramMetadata() const
Definition: pi.hpp:371
pi_uint32
uint32_t pi_uint32
Definition: pi.h:94
cl::sycl::detail::pi::DeviceBinaryImage::getAssertUsed
const PropertyRange getAssertUsed() const
Definition: pi.hpp:364
pi_device_binary_struct
This struct is a record of the device binary information.
Definition: pi.h:758
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:2052
program_impl.hpp
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:36
cl::sycl::detail::program_impl::flush_spec_constants
void flush_spec_constants(const RTDeviceBinaryImage &Img, RT::PiProgram NativePrg=nullptr) const
Takes current values of specialization constants and "injects" them into the underlying native progra...
Definition: program_impl.cpp:541
cl::sycl::detail::getUint32PropAsBool
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, const char *PropName)
Definition: program_manager.cpp:383
cl::sycl::detail::createSpirvProgram
static RT::PiProgram createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
Definition: program_manager.cpp:96
cl::sycl::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:632
cl::sycl::detail::getDeviceLibExtensionStr
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
Definition: program_manager.cpp:780
pi_device_binaries_struct::NumDeviceBinaries
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
Definition: pi.h:861
piProgramCreateWithBinary
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
Definition: pi_esimd_emulator.cpp:1284
piKernelSetExecInfo
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
Definition: pi_esimd_emulator.cpp:1936
PI_DEVICE_BINARY_TYPE_NATIVE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE
Definition: pi.h:684
cl::sycl::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:39
cl::sycl::detail::createKernelArgMask
static ProgramManager::KernelArgMask createKernelArgMask(const pi::ByteArray &Bytes)
Definition: program_manager.cpp:1087
cl::sycl::detail::pi::DeviceBinaryImage::getDeviceLibReqMask
const PropertyRange & getDeviceLibReqMask() const
Definition: pi.hpp:360
cl::sycl::detail::device_image_impl::SpecConstDescT::BlobOffset
unsigned int BlobOffset
Definition: device_image_impl.hpp:52
cl::sycl::detail::BS_Done
@ BS_Done
Definition: program_manager.cpp:48
cl::sycl::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
cl::sycl::detail::device_image_impl::SpecConstDescT::Size
unsigned int Size
Definition: device_image_impl.hpp:51
cl::sycl::detail::KernelProgramCache::BuildResult::MBuildResultMutex
std::mutex MBuildResultMutex
A mutex to be employed along with MBuildCV.
Definition: kernel_program_cache.hpp:65
Device
@ Device
Definition: usm_allocator.hpp:14
piDeviceGetInfo
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_esimd_emulator.cpp:591
cl::sycl::detail::pi::DeviceBinaryProperty
Definition: pi.hpp:227
cl::sycl::detail::ProgramManager::addImages
void addImages(pi_device_binaries DeviceImages)
Definition: program_manager.cpp:1120
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::program_impl::get_build_options
std::string get_build_options() const
Returns the compile, link, or build options, from whichever of those operations was performed most re...
Definition: program_impl.hpp:310
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange
Definition: pi.hpp:249
global_handler.hpp
_pi_device_binary_property_struct
Definition: pi.h:661
cl::sycl::detail::ProgramManager::getInstance
static ProgramManager & getInstance()
Definition: program_manager.cpp:63
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:718
cl::sycl::detail::pi::DeviceBinaryImage::getProperty
pi_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
Definition: pi.cpp:675
cl::sycl::detail::program_impl::stableSerializeSpecConstRegistry
void stableSerializeSpecConstRegistry(SerializedObj &Dst) const
Definition: program_impl.hpp:336
cl::sycl::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
cl::sycl::detail::pi::DeviceBinaryImage::getSize
size_t getSize() const
Definition: pi.hpp:303
program_manager.hpp
persistent_device_code_cache.hpp
cl::sycl::detail::pi::ByteArray::size
std::size_t size() const
Definition: pi.hpp:217
cl::sycl::context::get_devices
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
Definition: context.cpp:128
cl::sycl::detail::ProgramManager
Definition: program_manager.hpp:73
cl::sycl::detail::LessByHash
Definition: device_image_impl.hpp:35
cl::sycl::detail::device_image_impl::SpecConstDescT::ID
unsigned int ID
Definition: device_image_impl.hpp:49
cl::sycl::detail::UseSpvEnv
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:2057
cl::sycl::detail::compatibleWithDevice
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
Definition: program_manager.cpp:1453
cl::sycl::detail::KernelProgramCache::BuildError::Msg
std::string Msg
Definition: kernel_program_cache.hpp:36
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(cl::sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
cl::sycl::detail::OSModuleHandle
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
device_image_impl.hpp
cl::sycl::detail::loadDeviceLibFallback
static RT::PiProgram loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, const RT::PiDevice &Device)
Definition: program_manager.cpp:803
cl::sycl::detail::ProgramManager::KernelArgMask
std::vector< bool > KernelArgMask
Definition: program_manager.hpp:76
cl::sycl::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:467
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
Definition: pi.h:271
piKernelGetInfo
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1360
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:204
backend_types.hpp
exception.hpp
cl::sycl::detail::KernelProgramCache::ProgramCacheT
std::map< ProgramCacheKeyT, ProgramWithBuildStateT > ProgramCacheT
Definition: kernel_program_cache.hpp:75
cl::sycl::detail::appendLinkOptionsFromImage
static void appendLinkOptionsFromImage(std::string &LinkOpts, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:369
cl::sycl::detail::getBinImageState
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
Definition: program_manager.cpp:1437
cl::sycl::detail::KernelProgramCache::waitUntilBuilt
void waitUntilBuilt(BuildResult< T > &BR, Predicate Pred) const
Definition: kernel_program_cache.hpp:105
cl::sycl::detail::getDeviceLibFilename
static const char * getDeviceLibFilename(DeviceLibExt Extension)
Definition: program_manager.cpp:757
cl::sycl::detail::appendCompileOptionsFromImage
static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:389
cl::sycl::detail::program_impl::hasSetSpecConstants
bool hasSetSpecConstants() const
Tells whether a specialization constant has been set for this program.
Definition: program_impl.hpp:341
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:38
cl::sycl::detail::KernelProgramCache::BuildResult::Error
BuildError Error
Definition: kernel_program_cache.hpp:51
piProgramCreate
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
Definition: pi_esimd_emulator.cpp:1280
cl::sycl::detail::KernelProgramCache::acquireKernelsPerProgramCache
Locked< KernelCacheT > acquireKernelsPerProgramCache()
Definition: kernel_program_cache.hpp:100
piProgramGetBuildInfo
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:1325
cl::sycl::detail::createBinaryProgram
static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< pi_device_binary_property > Metadata)
Definition: program_manager.cpp:68
cl::sycl::detail::pi::ByteArray
Definition: pi.hpp:211
cl::sycl::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:55
any_of
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
cl::sycl::detail::BS_InProgress
@ BS_InProgress
Definition: program_manager.cpp:48
piextDeviceSelectBinary
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
Definition: pi_esimd_emulator.cpp:1946
common.hpp
cl::sycl::ext::oneapi::experimental::spec_const_error
Definition: spec_constant.hpp:30
cl::sycl::exception
Definition: exception.hpp:64
cl::sycl::detail::KernelProgramCache::acquireCachedPrograms
Locked< ProgramCacheT > acquireCachedPrograms()
Definition: kernel_program_cache.hpp:96
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
Definition: pi.h:712
_pi_plugin::PiFunctionTable
struct _pi_plugin::FunctionPointers PiFunctionTable
pi_device_binary_struct::EntriesEnd
_pi_offload_entry EntriesEnd
Definition: pi.h:795
cl::sycl::detail::loadDeviceLib
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, RT::PiProgram &Prog)
Definition: program_manager.cpp:736
cl::sycl::detail::pi::DeviceBinaryImage::get
pi_device_binary get() const
Definition: pi.hpp:390
cl::sycl::detail::device_image_impl::SpecConstDescT::IsSet
bool IsSet
Definition: device_image_impl.hpp:53
PI_PROGRAM_INFO_DEVICES
@ PI_PROGRAM_INFO_DEVICES
Definition: pi.h:290
spec_constant_impl.hpp
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::isAvailable
bool isAvailable() const
Definition: pi.hpp:280
piextProgramSetSpecializationConstant
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
Definition: pi_esimd_emulator.cpp:1941
cl::sycl::detail::program_impl::getHandleRef
RT::PiProgram & getHandleRef()
Definition: program_impl.hpp:133
cl::sycl::exception::what
const char * what() const noexcept final
Definition: exception.cpp:103
_pi_offload_entry_struct::name
char * name
Definition: pi.h:641
cl::sycl::detail::plugin::reportPiError
void reportPiError(RT::PiResult pi_result, const char *context) const
Definition: plugin.hpp:149
PI_TRUE
const pi_bool PI_TRUE
Definition: pi.h:458
cl::sycl::detail::emitBuiltProgramInfo
static void emitBuiltProgramInfo(const pi_program &Prog, const ContextImplPtr &Context)
Emits information about built programs if the appropriate contitions are met, namely when SYCL_RT_WAR...
Definition: program_manager.cpp:486
piProgramBuild
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
cl::sycl::detail::KernelProgramCache::PiProgramT
std::remove_pointer< RT::PiProgram >::type PiProgramT
Definition: kernel_program_cache.hpp:70
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
Definition: pi.h:711
cl::sycl::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:50
cl::sycl::detail::device_image_impl::SpecConstDescT
Definition: device_image_impl.hpp:48
cl::sycl::detail::applyCompileOptionsFromEnvironment
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts)
Definition: program_manager.cpp:434
PI_PROGRAM_BUILD_INFO_LOG
@ PI_PROGRAM_BUILD_INFO_LOG
Definition: pi.h:131
pi_int32
int32_t pi_int32
Definition: pi.h:93
spec_constant.hpp
cl::sycl::detail::KernelProgramCache::tryToGetKernelFast
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
Definition: kernel_program_cache.hpp:116
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:82
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12
cl::sycl::detail::DeviceImageImplPtr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
Definition: kernel_bundle.hpp:67
pi_device_binary_struct::EntriesBegin
_pi_offload_entry EntriesBegin
the offload entry table
Definition: pi.h:794
cl::sycl::detail::KernelProgramCache::BuildResult::State
std::atomic< int > State
Definition: kernel_program_cache.hpp:50