48 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
52 static constexpr
char UseSpvEnv[](
"SYCL_USE_KERNEL_SPV");
59 constexpr
char SpecValue = 1;
66 return GlobalHandler::instance().getProgramManager();
71 const unsigned char *Data,
size_t DataLen,
72 const std::vector<pi_device_binary_property> Metadata) {
78 sizeof(NumDevices), &NumDevices,
80 assert(NumDevices > 0 &&
81 "Only a single device is supported for AOT compilation");
88 Context->getHandleRef(), 1 , &
PiDevice, &DataLen, &Data,
89 Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
91 if (BinaryStatus != CL_SUCCESS) {
92 throw runtime_error(
"Creating program with binary failed.", BinaryStatus);
99 const unsigned char *Data,
108 RTDeviceBinaryImage &
111 bool JITCompilationIsRequired) {
113 std::cerr <<
">>> ProgramManager::getDeviceImage(" << M <<
", \""
119 return getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
141 template <
typename RetT,
typename ExceptionT,
typename GetCachedBuildFT,
148 auto [BuildResult, InsertionTookPlace] = GetCachedBuild();
152 if (!InsertionTookPlace) {
161 BuildState Expected = BuildState::BS_Failed;
162 BuildState Desired = BuildState::BS_InProgress;
164 if (BuildResult->State.compare_exchange_strong(Expected, Desired))
171 RetT *Desired = Build();
174 RetT *Expected =
nullptr;
176 if (!BuildResult->Ptr.compare_exchange_strong(Expected, Desired))
178 assert(
false &&
"We've build an entity that is already have been built.");
180 BuildResult->Ptr.store(Desired);
186 std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
187 BuildResult->State.store(BuildState::BS_Done);
194 BuildResult->Error.Msg = Ex.
what();
198 std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
199 BuildResult->State.store(BuildState::BS_Failed);
204 std::rethrow_exception(std::current_exception());
207 std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
208 BuildResult->State.store(BuildState::BS_Failed);
213 std::rethrow_exception(std::current_exception());
228 if (ContextBackend == backend::ext_oneapi_cuda)
234 for (
const device &D : Devices) {
235 if (!D.get_info<info::device::is_compiler_available>())
240 if (ContextBackend == backend::opencl) {
242 if (ver.find(
"OpenCL 1.0") == std::string::npos &&
243 ver.find(
"OpenCL 1.1") == std::string::npos &&
244 ver.find(
"OpenCL 1.2") == std::string::npos &&
245 ver.find(
"OpenCL 2.0") == std::string::npos)
249 for (
const device &D : Devices) {
252 std::vector<std::string> Extensions =
253 D.get_info<info::device::extensions>();
254 if (Extensions.end() ==
255 std::find(Extensions.begin(), Extensions.end(),
"cl_khr_il_program"))
273 assert(
false &&
"Unknown device image format");
281 std::cerr <<
">>> ProgramManager::createPIProgram(" << &Img <<
", "
288 throw runtime_error(
"Malformed device program image descriptor",
289 PI_ERROR_INVALID_VALUE);
292 throw runtime_error(
"Invalid device program image: size is zero",
293 PI_ERROR_INVALID_VALUE);
295 size_t ImgSize = Img.
getSize();
312 "SPIR-V online compilation is not supported in this context",
313 PI_ERROR_INVALID_OPERATION);
317 std::vector<pi_device_binary_property> ProgMetadataVector{
318 ProgMetadata.
begin(), ProgMetadata.end()};
325 ImgSize, ProgMetadataVector);
328 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
330 NativePrograms[Res] = &Img;
333 Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
348 if (TemporaryStr !=
nullptr) {
349 if (!LinkOpts.empty())
351 LinkOpts += std::string(TemporaryStr);
357 const char *PropName) {
364 const std::vector<device> &Devs,
369 static const char *CompileOptsEnv =
373 if (!CompileOptsEnv) {
374 if (!CompileOpts.empty())
377 if (TemporaryStr !=
nullptr)
378 CompileOpts += std::string(TemporaryStr);
387 if (!CompileOpts.empty())
389 CompileOpts +=
"-vc-codegen";
393 CompileOpts +=
" -disable-finalizer-msg";
396 if (!CompileOpts.empty())
401 CompileOpts += isEsimdImage ?
"-doubleGRF" :
"-ze-opt-large-register-file";
403 if ((Plugin.
getBackend() == backend::ext_oneapi_level_zero ||
406 [](
const device &Dev) { return Dev.is_gpu(); }) &&
410 if (!CompileOpts.empty())
412 CompileOpts +=
"-ze-take-global-address";
417 std::string &LinkOpts,
419 const std::vector<device> &Devices,
428 static const char *CompileOptsEnv =
430 if (CompileOptsEnv) {
431 CompileOpts = CompileOptsEnv;
440 LinkOpts = LinkOptsEnv;
445 std::string &LinkOpts) {
451 std::pair<RT::PiProgram, bool> ProgramManager::getOrCreatePIProgram(
453 const device &Device,
const std::string &CompileAndLinkOptions,
457 auto BinProg = PersistentDeviceCodeCache::getItemFromDisc(
458 Device, Img, SpecConsts, CompileAndLinkOptions);
459 if (BinProg.size()) {
462 std::vector<pi_device_binary_property> ProgMetadataVector{
463 ProgMetadata.
begin(), ProgMetadata.end()};
467 (
const unsigned char *)BinProg[0].data(),
468 BinProg[0].size(), ProgMetadataVector);
470 NativePrg = createPIProgram(Img, Context, Device);
472 return {NativePrg, BinProg.size()};
480 std::string ProgramBuildLog =
481 ProgramManager::getProgramBuildLog(Prog, Context);
482 std::clog << ProgramBuildLog << std::endl;
488 const DeviceImplPtr &DeviceImpl,
const std::string &KernelName,
489 const program_impl *Prg,
bool JITCompilationIsRequired) {
498 std::string CompileOpts;
499 std::string LinkOpts;
513 while (!RootDevImpl->isRootDevice()) {
515 RootDevImpl->get_info<info::device::parent_device>());
517 if (!ContextImpl->hasDevice(ParentDev))
519 RootDevImpl = ParentDev;
525 sizeof(
pi_bool), &MustBuildOnSubdevice,
nullptr);
528 (MustBuildOnSubdevice ==
PI_TRUE) ? DeviceImpl : RootDevImpl;
529 auto Context = createSyclObjFromImpl<context>(ContextImpl);
530 auto Device = createSyclObjFromImpl<device>(Dev);
532 getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
538 #define __SYCL_ASPECT(ASPECT, ID) \
539 case aspect::ASPECT: \
541 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
544 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
545 auto getAspectNameStr = [](aspect AspectNum) -> std::string {
547 #include <sycl/info/aspects.def>
548 #include <sycl/info/aspects_deprecated.def>
550 throw sycl::exception(errc::kernel_not_supported,
552 std::to_string(
static_cast<unsigned>(AspectNum)));
554 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
555 #undef __SYCL_ASPECT_DEPRECATED
559 using namespace std::literals;
560 if ((*It)->Name !=
"aspects"sv)
564 auto *AIt =
reinterpret_cast<const std::uint32_t *
>(&Aspects[8]);
566 reinterpret_cast<const std::uint32_t *
>(&Aspects[0] + Aspects.
size());
567 while (AIt != AEnd) {
568 auto Aspect =
static_cast<aspect
>(*AIt);
570 if (!Dev->has(Aspect))
571 throw sycl::exception(errc::kernel_not_supported,
572 "Required aspect " + getAspectNameStr(Aspect) +
573 " is not supported on the device");
578 auto BuildF = [
this, &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts,
579 &LinkOpts, SpecConsts] {
583 auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
584 Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
586 if (!DeviceCodeWasInCache) {
588 flushSpecConstants(*Prg, NativePrg, &Img);
593 ProgramPtr ProgramManaged(
594 NativePrg, Plugin.
getPiPlugin().PiFunctionTable.piProgramRelease);
602 uint32_t DeviceLibReqMask = 0;
603 if (!DeviceCodeWasInCache &&
606 DeviceLibReqMask = getDeviceLibReqMask(Img);
608 ProgramPtr BuiltProgram =
609 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
615 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
616 NativePrograms[BuiltProgram.get()] = &Img;
619 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
622 if (!DeviceCodeWasInCache)
623 PersistentDeviceCodeCache::putItemToDisc(
624 Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.
get());
625 return BuiltProgram.release();
631 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
632 std::make_pair(
PiDevice, CompileOpts + LinkOpts));
634 auto GetCachedBuildF = [&Cache, &CacheKey]() {
638 auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
639 Cache, GetCachedBuildF, BuildF);
641 assert(BuildResult !=
nullptr &&
"Invalid build result");
642 return BuildResult->Ptr.load();
645 std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
649 const std::string &KernelName,
652 std::cerr <<
">>> ProgramManager::getOrCreateKernel(" << M <<
", "
653 << ContextImpl.get() <<
", " << DeviceImpl.get() <<
", "
654 << KernelName <<
")\n";
661 std::string CompileOpts, LinkOpts;
671 CompileOpts + LinkOpts, KernelName);
673 if (std::get<0>(ret_tuple))
677 getBuiltPIProgram(M, ContextImpl, DeviceImpl, KernelName, Prg);
679 auto BuildF = [&Program, &KernelName, &ContextImpl] {
680 PiKernelT *Result =
nullptr;
684 Program, KernelName.c_str(), &Result);
694 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
698 auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
699 Cache, GetCachedBuildF, BuildF);
701 assert(BuildResult !=
nullptr &&
"Invalid build result");
703 &(BuildResult->MBuildResultMutex), Program);
718 std::string ProgramManager::getProgramBuildLog(
const RT::PiProgram &Program,
720 size_t PIDevicesSize = 0;
723 nullptr, &PIDevicesSize);
724 std::vector<RT::PiDevice> PIDevices(PIDevicesSize /
sizeof(
RT::PiDevice));
726 PIDevicesSize, PIDevices.data(),
728 std::string Log =
"The program was built for " +
729 std::to_string(PIDevices.size()) +
" devices";
731 std::string DeviceBuildInfoString;
732 size_t DeviceBuildInfoStrSize = 0;
735 &DeviceBuildInfoStrSize);
736 if (DeviceBuildInfoStrSize > 0) {
737 std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
740 DeviceBuildInfo.data(),
nullptr);
741 DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
744 std::string DeviceNameString;
745 size_t DeviceNameStrSize = 0;
747 nullptr, &DeviceNameStrSize);
748 if (DeviceNameStrSize > 0) {
749 std::vector<char> DeviceName(DeviceNameStrSize);
752 DeviceName.data(),
nullptr);
753 DeviceNameString = std::string(DeviceName.data());
755 Log +=
"\nBuild program log for '" + DeviceNameString +
"':\n" +
756 DeviceBuildInfoString;
766 std::string LibSyclDir = OSUtil::getCurrentDSODir();
767 std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
768 std::ifstream::in | std::ifstream::binary);
773 File.seekg(0, std::ios::end);
774 size_t FileSize = File.tellg();
775 File.seekg(0, std::ios::beg);
776 std::vector<char> FileContent(FileSize);
777 File.read(&FileContent[0], FileSize);
782 return Prog !=
nullptr;
787 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
789 {DeviceLibExt::cl_intel_devicelib_assert,
790 {
nullptr,
"libsycl-fallback-cassert.spv"}},
791 {DeviceLibExt::cl_intel_devicelib_math,
792 {
nullptr,
"libsycl-fallback-cmath.spv"}},
793 {DeviceLibExt::cl_intel_devicelib_math_fp64,
794 {
nullptr,
"libsycl-fallback-cmath-fp64.spv"}},
795 {DeviceLibExt::cl_intel_devicelib_complex,
796 {
nullptr,
"libsycl-fallback-complex.spv"}},
797 {DeviceLibExt::cl_intel_devicelib_complex_fp64,
798 {
nullptr,
"libsycl-fallback-complex-fp64.spv"}},
799 {DeviceLibExt::cl_intel_devicelib_cstring,
800 {
nullptr,
"libsycl-fallback-cstring.spv"}},
801 {DeviceLibExt::cl_intel_devicelib_imf,
802 {
nullptr,
"libsycl-fallback-imf.spv"}},
803 {DeviceLibExt::cl_intel_devicelib_imf_fp64,
804 {
nullptr,
"libsycl-fallback-imf-fp64.spv"}},
805 {DeviceLibExt::cl_intel_devicelib_imf_bf16,
806 {
nullptr,
"libsycl-fallback-imf-bf16.spv"}},
807 {DeviceLibExt::cl_intel_devicelib_bfloat16,
808 {
"libsycl-native-bfloat16.spv",
"libsycl-fallback-bfloat16.spv"}}};
812 const char *Lib =
nullptr;
814 Lib = Native ? LibPair->second.first : LibPair->second.second;
817 PI_ERROR_INVALID_OPERATION);
825 {DeviceLibExt::cl_intel_devicelib_assert,
"cl_intel_devicelib_assert"},
826 {DeviceLibExt::cl_intel_devicelib_math,
"cl_intel_devicelib_math"},
827 {DeviceLibExt::cl_intel_devicelib_math_fp64,
828 "cl_intel_devicelib_math_fp64"},
829 {DeviceLibExt::cl_intel_devicelib_complex,
"cl_intel_devicelib_complex"},
830 {DeviceLibExt::cl_intel_devicelib_complex_fp64,
831 "cl_intel_devicelib_complex_fp64"},
832 {DeviceLibExt::cl_intel_devicelib_cstring,
"cl_intel_devicelib_cstring"},
833 {DeviceLibExt::cl_intel_devicelib_imf,
"cl_intel_devicelib_imf"},
834 {DeviceLibExt::cl_intel_devicelib_imf_fp64,
"cl_intel_devicelib_imf_fp64"},
835 {DeviceLibExt::cl_intel_devicelib_imf_bf16,
"cl_intel_devicelib_imf_bf16"},
836 {DeviceLibExt::cl_intel_devicelib_bfloat16,
837 "cl_intel_bfloat16_conversions"}};
843 PI_ERROR_INVALID_OPERATION);
854 auto LockedCache = Context->acquireCachedLibPrograms();
855 auto CachedLibPrograms = LockedCache.get();
856 auto CacheResult = CachedLibPrograms.emplace(
857 std::make_pair(std::make_pair(Extension, Device),
nullptr));
858 bool Cached = !CacheResult.second;
859 auto LibProgIt = CacheResult.first;
866 CachedLibPrograms.erase(LibProgIt);
868 PI_ERROR_INVALID_VALUE);
880 "", 0,
nullptr,
nullptr,
nullptr,
nullptr);
881 if (Error != PI_SUCCESS) {
882 CachedLibPrograms.erase(LibProgIt);
884 ProgramManager::getProgramBuildLog(LibProg, Context), Error);
890 ProgramManager::ProgramManager() {
891 const char *SpvFile = std::getenv(
UseSpvEnv);
898 std::ifstream File(SpvFile, std::ios::binary);
901 throw runtime_error(std::string(
"Can't open file specified via ") +
903 PI_ERROR_INVALID_VALUE);
904 File.seekg(0, std::ios::end);
905 size_t Size = File.tellg();
906 std::unique_ptr<char[]> Data(
new char[Size]);
908 File.read(Data.get(), Size);
911 throw runtime_error(std::string(
"read from ") + SpvFile +
912 std::string(
" failed"),
913 PI_ERROR_INVALID_VALUE);
914 auto ImgPtr = make_unique_ptr<DynRTDeviceBinaryImage>(
915 std::move(Data), Size, OSUtil::DummyModuleHandle);
918 std::cerr <<
"loaded device image binary from " << SpvFile <<
"\n";
925 new std::vector<RTDeviceBinaryImageUPtr>());
926 m_DeviceImages[
SpvFileKSId]->push_back(std::move(ImgPtr));
933 bool JITCompilationIsRequired) {
935 std::cerr <<
">>> ProgramManager::getDeviceImage(" << M <<
", \"" << KSId
940 std::cerr <<
"available device images:\n";
941 debugPrintBinaryImages();
943 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
944 auto It = m_DeviceImages.find(KSId);
945 assert(It != m_DeviceImages.end() &&
946 "No device image found for the given kernel set id");
947 std::vector<RTDeviceBinaryImageUPtr> &Imgs = *It->second;
958 std::vector<pi_device_binary> RawImgs(Imgs.size());
959 for (
unsigned I = 0; I < Imgs.size(); I++)
966 if (JITCompilationIsRequired) {
976 PI_ERROR_INVALID_OPERATION);
980 Img = Imgs[ImgInd].
get();
991 0x1 << (
static_cast<uint32_t
>(Ext) -
992 static_cast<uint32_t
>(DeviceLibExt::cl_intel_devicelib_assert));
993 return ((DeviceLibReqMask & Mask) == Mask);
996 static std::vector<RT::PiProgram>
998 uint32_t DeviceLibReqMask) {
999 std::vector<RT::PiProgram> Programs;
1001 std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1002 {DeviceLibExt::cl_intel_devicelib_assert,
1004 {DeviceLibExt::cl_intel_devicelib_math,
false},
1005 {DeviceLibExt::cl_intel_devicelib_math_fp64,
false},
1006 {DeviceLibExt::cl_intel_devicelib_complex,
false},
1007 {DeviceLibExt::cl_intel_devicelib_complex_fp64,
false},
1008 {DeviceLibExt::cl_intel_devicelib_cstring,
false},
1009 {DeviceLibExt::cl_intel_devicelib_imf,
false},
1010 {DeviceLibExt::cl_intel_devicelib_imf_fp64,
false},
1011 {DeviceLibExt::cl_intel_devicelib_imf_bf16,
false},
1012 {DeviceLibExt::cl_intel_devicelib_bfloat16,
false}};
1018 Context->getPlugin());
1019 const bool fp64Support = (DevExtList.npos != DevExtList.find(
"cl_khr_fp64"));
1023 for (
auto &Pair : RequiredDeviceLibExt) {
1025 bool &FallbackIsLoaded = Pair.second;
1027 if (FallbackIsLoaded) {
1034 if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 ||
1035 Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) &&
1042 bool InhibitNativeImpl =
false;
1043 if (
const char *Env = getenv(
"SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1044 InhibitNativeImpl = strstr(Env, ExtName) !=
nullptr;
1047 bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1048 if (!DeviceSupports || InhibitNativeImpl) {
1051 FallbackIsLoaded =
true;
1054 if (Ext == DeviceLibExt::cl_intel_devicelib_bfloat16) {
1057 FallbackIsLoaded =
true;
1064 ProgramManager::ProgramPtr
1066 const std::string &CompileOptions,
1067 const std::string &LinkOptions,
1068 const RT::PiDevice &Device, uint32_t DeviceLibReqMask) {
1071 std::cerr <<
">>> ProgramManager::build(" << Program.get() <<
", "
1072 << CompileOptions <<
", " << LinkOptions <<
", ... " << Device
1085 DeviceLibReqMask &= 0xFFFFFFFE;
1086 bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1091 if (CompileOptions.find(std::string(
"-cmc")) != std::string::npos ||
1092 CompileOptions.find(std::string(
"-vc-codegen")) != std::string::npos)
1093 LinkDeviceLibs =
false;
1095 std::vector<RT::PiProgram> LinkPrograms;
1096 if (LinkDeviceLibs) {
1100 static const char *ForceLinkEnv = std::getenv(
"SYCL_FORCE_LINK");
1101 static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv ==
'1');
1103 const detail::plugin &Plugin = Context->getPlugin();
1104 if (LinkPrograms.empty() && !ForceLink) {
1105 const std::string &Options = LinkOptions.empty()
1107 : (CompileOptions +
" " + LinkOptions);
1109 Program.get(), 1, &Device, Options.c_str(),
nullptr,
1111 if (Error != PI_SUCCESS)
1119 &
Device, CompileOptions.c_str(), 0,
1120 nullptr,
nullptr,
nullptr,
nullptr);
1121 LinkPrograms.push_back(Program.get());
1125 Context->getHandleRef(), 1, &
Device, LinkOptions.c_str(),
1126 LinkPrograms.size(), LinkPrograms.data(),
nullptr,
nullptr, &LinkedProg);
1130 Program.reset(LinkedProg);
1131 if (Error != PI_SUCCESS) {
1138 Plugin.checkPiResult(Error);
1143 static ProgramManager::KernelArgMask
1145 const int NBytesForSize = 8;
1146 const int NBitsInElement = 8;
1147 std::uint64_t SizeInBits = 0;
1148 for (
int I = 0; I < NBytesForSize; ++I)
1149 SizeInBits |=
static_cast<std::uint64_t
>(Bytes[I]) << I * NBitsInElement;
1152 for (std::uint64_t I = 0; I < SizeInBits; ++I) {
1153 std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
1154 Result.push_back(Byte & (1 << (I % NBitsInElement)));
1161 RTDeviceBinaryImage &Img) {
1162 const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1163 Img.getAssertUsed();
1164 if (AssertUsedRange.isAvailable())
1165 for (
const auto &Prop : AssertUsedRange) {
1166 KernelNameWithOSModule Key{Prop->Name, M};
1167 m_KernelUsesAssert.insert(Key);
1172 const std::string &KernelName)
const {
1173 KernelNameWithOSModule Key{KernelName, M};
1174 return m_KernelUsesAssert.find(Key) != m_KernelUsesAssert.
end();
1178 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1179 const bool DumpImages = std::getenv(
"SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1186 auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);
1187 static uint32_t SequenceID = 0;
1191 Img->getKernelParamOptInfo();
1193 KernelNameToArgMaskMap &ArgMaskMap =
1194 m_EliminatedKernelArgMasks[Img.get()];
1195 for (
const auto &Info : KPOIRange)
1196 ArgMaskMap[Info->Name] =
1201 if (EntriesB != EntriesE) {
1202 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1205 auto ExportedSymbols = Img->getExportedSymbols();
1207 m_ExportedSymbols.insert(ExportedSymbol->Name);
1209 m_BinImg2KernelIDs[Img.get()].reset(
new std::vector<kernel_id>);
1218 if (std::strstr(EntriesIt->name,
"__sycl_service_kernel__")) {
1219 m_ServiceKernels.insert(EntriesIt->name);
1226 if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1230 auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1231 if (It == m_KernelName2KernelIDs.end()) {
1232 std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1233 std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1234 sycl::kernel_id KernelID =
1235 detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1237 It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name,
1241 m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1242 m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1246 std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1253 if (EntriesB != EntriesE) {
1256 StrToKSIdMap &KSIdMap = m_KernelSets[M];
1257 auto KSIdIt = KSIdMap.find(EntriesB->
name);
1258 if (KSIdIt != KSIdMap.end()) {
1259 auto &Imgs = m_DeviceImages[KSIdIt->second];
1260 assert(Imgs &&
"Device image vector should have been already created");
1262 const bool NeedsSequenceID =
1263 std::any_of(Imgs->begin(), Imgs->end(), [&](
auto &I) {
1264 return I->getFormat() == Img->getFormat();
1266 dumpImage(*Img, KSIdIt->second, NeedsSequenceID ? ++SequenceID : 0);
1269 cacheKernelUsesAssertInfo(M, *Img);
1271 Imgs->push_back(std::move(Img));
1277 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1281 KSIdMap.insert(std::make_pair(EntriesIt->name, KSId));
1286 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1288 auto DeviceGlobals = Img->getDeviceGlobals();
1299 auto [TypeSize, DeviceImageScopeDecorated] =
1300 DeviceGlobalInfo.
consume<std::uint32_t, std::uint32_t>();
1301 assert(DeviceGlobalInfo.
empty() &&
"Extra data left!");
1305 uintptr_t ImgId =
reinterpret_cast<uintptr_t
>(Img.get());
1307 auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1308 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1310 ExistingDeviceGlobal->second->initialize(ImgId, KSId, TypeSize,
1311 DeviceImageScopeDecorated);
1316 auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1317 DeviceGlobal->Name, ImgId, KSId, TypeSize,
1318 DeviceImageScopeDecorated);
1319 m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1323 m_DeviceImages[KSId].reset(
new std::vector<RTDeviceBinaryImageUPtr>());
1324 cacheKernelUsesAssertInfo(M, *Img);
1327 dumpImage(*Img, KSId);
1328 m_DeviceImages[KSId]->push_back(std::move(Img));
1336 KSId = getNextKernelSetId();
1338 auto &Imgs = m_DeviceImages[KSId];
1340 Imgs.reset(
new std::vector<RTDeviceBinaryImageUPtr>());
1342 cacheKernelUsesAssertInfo(M, *Img);
1345 dumpImage(*Img, KSId);
1346 Imgs->push_back(std::move(Img));
1350 void ProgramManager::debugPrintBinaryImages()
const {
1351 for (
const auto &ImgVecIt : m_DeviceImages) {
1352 std::cerr <<
" ++++++ Kernel set: " << ImgVecIt.first <<
"\n";
1353 for (
const auto &Img : *ImgVecIt.second)
1358 KernelSetId ProgramManager::getNextKernelSetId()
const {
1366 const std::string &KernelName)
const {
1369 if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1371 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1372 auto KSIdMapIt = m_KernelSets.find(M);
1373 if (KSIdMapIt != m_KernelSets.end()) {
1374 const StrToKSIdMap &KSIdMap = KSIdMapIt->second;
1375 auto KSIdIt = KSIdMap.find(KernelName);
1377 if (KSIdIt != KSIdMap.end())
1378 return KSIdIt->second;
1382 auto ModuleKSIdIt = m_OSModuleKernelSets.find(M);
1383 if (ModuleKSIdIt != m_OSModuleKernelSets.end())
1384 return ModuleKSIdIt->second;
1386 throw runtime_error(
"No kernel named " + KernelName +
" was found",
1387 PI_ERROR_INVALID_KERNEL_NAME);
1390 void ProgramManager::dumpImage(
const RTDeviceBinaryImage &Img,
KernelSetId KSId,
1391 uint32_t SequenceID)
const {
1392 std::string Fname(
"sycl_");
1395 Fname += std::to_string(KSId);
1397 Fname +=
'_' + std::to_string(SequenceID);
1409 std::ofstream F(Fname, std::ios::binary);
1412 throw runtime_error(
"Can not write " + Fname, PI_ERROR_UNKNOWN);
1422 std::cerr <<
">>> ProgramManager::flushSpecConstants(" << Prg.
get()
1429 assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1430 NativePrg = NativePrg ? NativePrg : PrgHandle;
1435 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1436 auto It = NativePrograms.find(NativePrg);
1437 if (It == NativePrograms.end())
1438 throw sycl::ext::oneapi::experimental::spec_const_error(
1439 "spec constant is set in a program w/o a binary image",
1440 PI_ERROR_INVALID_OPERATION);
1445 std::cerr <<
">>> ProgramManager::flushSpecConstants: binary image "
1446 << &Img->
getRawData() <<
" doesn't support spec constants\n";
1474 if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1478 if (m_EliminatedKernelArgMasks.empty())
1482 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1483 auto ImgIt = NativePrograms.find(NativePrg);
1484 if (ImgIt != NativePrograms.end()) {
1485 auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1486 if (MapIt != m_EliminatedKernelArgMasks.end())
1487 return MapIt->second[KernelName];
1494 for (
auto &Elem : m_EliminatedKernelArgMasks) {
1495 auto ArgMask = Elem.second.find(KernelName);
1496 if (ArgMask != Elem.second.end())
1497 return ArgMask->second;
1505 auto IsAOTBinary = [](
const char *Format) {
1517 return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1522 const std::shared_ptr<detail::device_impl> &DeviceImpl =
1524 auto &Plugin = DeviceImpl->getPlugin();
1526 const RT::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef();
1535 PIDeviceHandle, &DevBin,
1537 if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1538 throw runtime_error(
"Invalid binary image or device",
1539 PI_ERROR_INVALID_VALUE);
1541 return (0 == SuitableImageID);
1544 kernel_id ProgramManager::getSYCLKernelID(
const std::string &KernelName) {
1545 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1547 auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1548 if (KernelID == m_KernelName2KernelIDs.end())
1549 throw runtime_error(
"No kernel found with the specified name",
1550 PI_ERROR_INVALID_KERNEL_NAME);
1552 return KernelID->second;
1555 bool ProgramManager::hasCompatibleImage(
const device &Dev) {
1556 std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1559 m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1561 std::shared_ptr<std::vector<kernel_id>>>
1562 Elem) { return compatibleWithDevice(Elem.first, Dev); });
1565 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1566 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1568 std::vector<sycl::kernel_id> AllKernelIDs;
1569 AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1570 for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1571 AllKernelIDs.push_back(KernelID.second);
1573 return AllKernelIDs;
1576 kernel_id ProgramManager::getBuiltInKernelID(
const std::string &KernelName) {
1577 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1579 auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1580 if (KernelID == m_BuiltInKernelIDs.end()) {
1581 auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1582 auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1583 KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1586 return KernelID->second;
1589 void ProgramManager::addOrInitDeviceGlobalEntry(
const void *DeviceGlobalPtr,
1590 const char *UniqueId) {
1591 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1593 auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1594 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1596 ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1597 m_Ptr2DeviceGlobal.insert(
1598 {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1603 std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1604 auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1605 m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1608 std::set<RTDeviceBinaryImage *>
1609 ProgramManager::getRawDeviceImages(
const std::vector<kernel_id> &KernelIDs) {
1610 std::set<RTDeviceBinaryImage *> BinImages;
1611 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1612 for (
const kernel_id &KID : KernelIDs) {
1613 auto Range = m_KernelIDs2BinImage.equal_range(KID);
1614 for (
auto It = Range.first, End = Range.second; It != End; ++It)
1615 BinImages.insert(It->second);
1621 ProgramManager::getDeviceGlobalEntry(
const void *DeviceGlobalPtr) {
1622 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1623 auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1624 assert(Entry != m_Ptr2DeviceGlobal.end() &&
"Device global entry not found");
1625 return Entry->second;
1628 std::vector<DeviceGlobalMapEntry *> ProgramManager::getDeviceGlobalEntries(
1629 const std::vector<std::string> &UniqueIds,
1630 bool ExcludeDeviceImageScopeDecorated) {
1631 std::vector<DeviceGlobalMapEntry *> FoundEntries;
1632 FoundEntries.reserve(UniqueIds.size());
1634 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1635 for (
const std::string &UniqueId : UniqueIds) {
1636 auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1637 assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1638 "Device global not found in map.");
1639 if (!ExcludeDeviceImageScopeDecorated ||
1640 !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1641 FoundEntries.push_back(DeviceGlobalEntry->second.get());
1643 return FoundEntries;
1652 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1655 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1656 KernelIDs = m_BinImg2KernelIDs[BinImage];
1660 BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1663 return createSyclObjFromImpl<device_image_plain>(Impl);
1666 std::vector<device_image_plain>
1667 ProgramManager::getSYCLDeviceImagesWithCompatibleState(
1668 const context &Ctx,
const std::vector<device> &Devs,
1669 bundle_state TargetState,
const std::vector<kernel_id> &KernelIDs) {
1673 std::set<RTDeviceBinaryImage *> BinImages;
1674 if (!KernelIDs.empty()) {
1675 for (
const auto &KID : KernelIDs) {
1676 bool isCompatibleWithAtLeastOneDev =
1677 std::any_of(Devs.begin(), Devs.end(), [&KID](
const auto &Dev) {
1678 return sycl::is_compatible({KID}, Dev);
1680 if (!isCompatibleWithAtLeastOneDev)
1681 throw sycl::exception(
1683 "Kernel is incompatible with all devices in devs");
1685 BinImages = getRawDeviceImages(KernelIDs);
1687 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1688 for (
auto &ImagesSets : m_DeviceImages) {
1689 auto &ImagesUPtrs = *ImagesSets.second.get();
1690 for (
auto &ImageUPtr : ImagesUPtrs)
1691 BinImages.insert(ImageUPtr.get());
1694 assert(BinImages.size() > 0 &&
"Expected to find at least one device image");
1705 for (
auto It = BinImages.begin(); It != BinImages.end();) {
1707 It = BinImages.erase(It);
1712 std::vector<device_image_plain> SYCLDeviceImages;
1720 struct DeviceBinaryImageInfo {
1721 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1723 int RequirementCounter = 0;
1725 std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
1727 for (
const sycl::device &Dev : Devs) {
1729 using StateImagesPairT =
1730 std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
1731 using KernelImageMapT =
1732 std::map<kernel_id, StateImagesPairT, LessByNameComp>;
1733 KernelImageMapT KernelImageMap;
1734 if (!KernelIDs.empty())
1735 for (
const kernel_id &KernelID : KernelIDs)
1736 KernelImageMap.insert({KernelID, {}});
1738 for (RTDeviceBinaryImage *BinImage : BinImages) {
1743 auto InsertRes = ImageInfoMap.insert({BinImage, {}});
1744 DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
1745 if (InsertRes.second) {
1749 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1750 ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
1754 const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
1756 int &ImgRequirementCounter = ImgInfo.RequirementCounter;
1759 if (!ImageKernelIDs || ImageKernelIDs->empty())
1763 for (kernel_id &KernelID : *ImageKernelIDs) {
1764 StateImagesPairT *StateImagesPair;
1766 if (!KernelIDs.empty()) {
1767 auto It = KernelImageMap.find(KernelID);
1768 if (It == KernelImageMap.end())
1770 StateImagesPair = &It->second;
1772 StateImagesPair = &KernelImageMap[KernelID];
1774 auto &[KernelImagesState, KernelImages] = *StateImagesPair;
1776 if (KernelImages.empty()) {
1777 KernelImagesState = ImgState;
1778 KernelImages.push_back(BinImage);
1779 ++ImgRequirementCounter;
1780 }
else if (KernelImagesState < ImgState) {
1781 for (RTDeviceBinaryImage *Img : KernelImages) {
1782 auto It = ImageInfoMap.find(Img);
1783 assert(It != ImageInfoMap.end());
1784 assert(It->second.RequirementCounter > 0);
1785 --(It->second.RequirementCounter);
1787 KernelImages.clear();
1788 KernelImages.push_back(BinImage);
1789 KernelImagesState = ImgState;
1790 ++ImgRequirementCounter;
1791 }
else if (KernelImagesState == ImgState) {
1792 KernelImages.push_back(BinImage);
1793 ++ImgRequirementCounter;
1799 for (
const auto &ImgInfoPair : ImageInfoMap) {
1800 if (ImgInfoPair.second.RequirementCounter == 0)
1804 ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
1805 ImgInfoPair.second.KernelIDs,
nullptr);
1807 SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
1810 return SYCLDeviceImages;
1813 void ProgramManager::bringSYCLDeviceImagesToState(
1814 std::vector<device_image_plain> &DeviceImages,
bundle_state TargetState) {
1819 switch (TargetState) {
1820 case bundle_state::input:
1822 assert(DevImageState == bundle_state::input);
1824 case bundle_state::object:
1825 if (DevImageState == bundle_state::input) {
1831 assert(DevImageState == bundle_state::object);
1833 case bundle_state::executable: {
1834 switch (DevImageState) {
1835 case bundle_state::input:
1839 case bundle_state::object: {
1840 std::vector<device_image_plain> LinkedDevImages =
1845 assert(LinkedDevImages.size() == 1 &&
"Expected one linked image here");
1846 DevImage = LinkedDevImages[0];
1849 case bundle_state::executable:
1860 std::vector<device_image_plain>
1861 ProgramManager::getSYCLDeviceImages(
const context &Ctx,
1862 const std::vector<device> &Devs,
1865 std::vector<device_image_plain> DeviceImages =
1866 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1868 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1869 return DeviceImages;
1872 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1873 const context &Ctx,
const std::vector<device> &Devs,
1876 std::vector<device_image_plain> DeviceImages =
1877 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1880 auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1882 return !Selector(getSyclObjImpl(Image));
1884 DeviceImages.erase(It, DeviceImages.end());
1888 return DeviceImages;
1891 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1892 const context &Ctx,
const std::vector<device> &Devs,
1893 const std::vector<kernel_id> &KernelIDs,
bundle_state TargetState) {
1895 if (KernelIDs.empty())
1899 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1901 for (
auto &It : m_BuiltInKernelIDs) {
1902 if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1905 "Attempting to use a built-in kernel. They are "
1906 "not fully supported");
1911 std::vector<device_image_plain> DeviceImages =
1912 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1915 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1916 return DeviceImages;
1925 std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
1926 const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
1927 &SpecConstData = InputImpl->get_spec_const_data_ref();
1928 const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
1931 for (
const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
1932 std::ignore = SpecConstNames;
1934 if (SpecIDDesc.IsSet) {
1936 Prog, SpecIDDesc.ID, SpecIDDesc.Size,
1937 SpecConsts.data() + SpecIDDesc.BlobOffset);
1945 const std::vector<device> &Devs,
1952 const std::shared_ptr<device_image_impl> &InputImpl =
1959 if (InputImpl->get_bin_image_ref()->getFormat() !=
1962 sycl::runtime_error(
1963 "Creating a program from AOT binary for multiple device is not "
1965 PI_ERROR_INVALID_OPERATION);
1969 RT::PiProgram Prog = createPIProgram(*InputImpl->get_bin_image_ref(),
1970 InputImpl->get_context(), Devs[0]);
1972 if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
1976 InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
1977 bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog,
1978 InputImpl->get_spec_const_data_ref(),
1979 InputImpl->get_spec_const_blob_ref());
1981 std::vector<pi_device> PIDevices;
1982 PIDevices.reserve(Devs.size());
1983 for (
const device &Dev : Devs)
1987 std::string CompileOptions;
1990 CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
1992 ObjectImpl->get_program_ref(), Devs.size(),
1993 PIDevices.data(), CompileOptions.c_str(),
1997 if (Error != PI_SUCCESS)
1998 throw sycl::exception(
2000 getProgramBuildLog(ObjectImpl->get_program_ref(),
2003 return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2006 std::vector<device_image_plain>
2008 const std::vector<device> &Devs,
2012 std::vector<pi_program> PIPrograms;
2013 PIPrograms.reserve(DeviceImages.size());
2015 PIPrograms.push_back(
getSyclObjImpl(DeviceImage)->get_program_ref());
2017 std::vector<pi_device> PIDevices;
2018 PIDevices.reserve(Devs.size());
2019 for (
const device &Dev : Devs)
2022 std::string LinkOptionsStr;
2024 if (LinkOptionsStr.empty()) {
2026 const std::shared_ptr<device_image_impl> &InputImpl =
2029 *(InputImpl->get_bin_image_ref()));
2038 ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
2039 LinkOptionsStr.c_str(), PIPrograms.size(), PIPrograms.data(),
2041 nullptr, &LinkedProg);
2043 if (Error != PI_SUCCESS) {
2045 const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2051 std::shared_ptr<std::vector<kernel_id>> KernelIDs{
new std::vector<kernel_id>};
2052 std::vector<unsigned char> NewSpecConstBlob;
2055 std::shared_ptr<device_image_impl> DeviceImageImpl =
2059 KernelIDs->insert(KernelIDs->end(),
2060 DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2061 DeviceImageImpl->get_kernel_ids_ptr()->end());
2067 const std::lock_guard<std::mutex> SpecConstLock(
2068 DeviceImageImpl->get_spec_const_data_lock());
2072 for (
const auto &SpecConstIt :
2073 DeviceImageImpl->get_spec_const_data_ref()) {
2074 std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2075 NewSpecConstMap[SpecConstIt.first];
2076 assert(NewDescEntries.empty() &&
2077 "Specialization constant already exists in the map.");
2078 NewDescEntries.reserve(SpecConstIt.second.size());
2080 SpecConstIt.second) {
2082 NewSpecConstDesc.
BlobOffset += NewSpecConstBlob.size();
2083 NewDescEntries.push_back(std::move(NewSpecConstDesc));
2089 NewSpecConstBlob.insert(
2090 NewSpecConstBlob.end(),
2091 DeviceImageImpl->get_spec_const_blob_ref().begin(),
2092 DeviceImageImpl->get_spec_const_blob_ref().end());
2099 std::make_shared<detail::device_image_impl>(
2100 nullptr, Context, Devs, bundle_state::executable,
2101 std::move(KernelIDs), LinkedProg, std::move(NewSpecConstMap),
2102 std::move(NewSpecConstBlob));
2106 return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2114 const std::vector<device> &Devs,
2118 const std::shared_ptr<device_image_impl> &InputImpl =
2121 const context Context = InputImpl->get_context();
2129 std::string CompileOpts;
2130 std::string LinkOpts;
2136 SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2139 auto BuildF = [
this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2140 &InputImpl, SpecConsts] {
2146 if (InputImpl->get_bin_image_ref()->getFormat() !=
2149 sycl::runtime_error(
2150 "Creating a program from AOT binary for multiple device is not "
2152 PI_ERROR_INVALID_OPERATION);
2156 auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
2157 Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2159 if (!DeviceCodeWasInCache &&
2160 InputImpl->get_bin_image_ref()->supportsSpecConstants())
2163 ProgramPtr ProgramManaged(
2164 NativePrg, Plugin.
getPiPlugin().PiFunctionTable.piProgramRelease);
2171 uint32_t DeviceLibReqMask = 0;
2174 DeviceLibReqMask = getDeviceLibReqMask(Img);
2176 ProgramPtr BuiltProgram =
2177 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2183 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2184 NativePrograms[BuiltProgram.get()] = &Img;
2187 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2190 if (!DeviceCodeWasInCache)
2191 PersistentDeviceCodeCache::putItemToDisc(
2192 Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
2194 return BuiltProgram.release();
2197 uint32_t ImgId = Img.getImageID();
2200 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
2201 std::make_pair(
PiDevice, CompileOpts + LinkOpts));
2205 auto GetCachedBuildF = [&Cache, &CacheKey]() {
2210 auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
2211 Cache, GetCachedBuildF, BuildF);
2213 assert(BuildResult !=
nullptr &&
"Invalid build result");
2221 auto CacheOtherDevices = [ResProgram, &Plugin]() {
2228 for (
size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2233 CacheKey.second.first = PiDeviceAdd;
2234 getOrBuild<PiProgramT, compile_program_error>(Cache, GetCachedBuildF,
2237 assert(BuildResult !=
nullptr &&
"Invalid build result");
2246 InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
2247 InputImpl->get_kernel_ids_ptr(), ResProgram,
2248 InputImpl->get_spec_const_data_ref(),
2249 InputImpl->get_spec_const_blob_ref());
2251 return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2254 std::pair<RT::PiKernel, std::mutex *> ProgramManager::getOrCreateKernel(
2255 const context &Context,
const std::string &KernelName,
2266 auto BuildF = [&Program, &KernelName, &Ctx] {
2267 PiKernelT *Result =
nullptr;
2279 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2283 auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
2284 Cache, GetCachedBuildF, BuildF);
2286 assert(BuildResult !=
nullptr &&
"Invalid build result");
2287 return std::make_pair(BuildResult->Ptr.load(),
2288 &(BuildResult->MBuildResultMutex));
2293 auto getPropIt = [&Img](
const std::string &PropName) {
2297 PropRange.
begin(), PropRange.
end(),
2299 return (*Prop)->Name == PropName;
2301 return (PropIt == PropRange.
end())
2307 auto AspectsPropIt = getPropIt(
"aspects");
2308 auto ReqdWGSizePropIt = getPropIt(
"reqd_work_group_size");
2310 if (!AspectsPropIt && !ReqdWGSizePropIt)
2314 if (AspectsPropIt) {
2319 while (!Aspects.
empty()) {
2320 aspect Aspect = Aspects.
consume<aspect>();
2322 if (!Dev.
has(Aspect))
2328 if (ReqdWGSizePropIt) {
2333 int ReqdWGSizeAllDimsTotal = 1;
2334 std::vector<int> ReqdWGSizeVec;
2336 while (!ReqdWGSize.
empty()) {
2337 int SingleDimSize = ReqdWGSize.
consume<
int>();
2338 ReqdWGSizeAllDimsTotal *= SingleDimSize;
2339 ReqdWGSizeVec.push_back(SingleDimSize);
2342 if (
static_cast<size_t>(ReqdWGSizeAllDimsTotal) >
2343 Dev.
get_info<info::device::max_work_group_size>())
2347 std::variant<id<1>,
id<2>,
id<3>> MaxWorkItemSizesVariant;
2349 MaxWorkItemSizesVariant =
2352 MaxWorkItemSizesVariant =
2355 MaxWorkItemSizesVariant =
2357 for (
int i = 0; i < Dims; i++) {
2362 if (
static_cast<size_t>(ReqdWGSizeVec[i]) >
2363 std::get<id<1>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2365 }
else if (Dims == 2) {
2366 if (
static_cast<size_t>(ReqdWGSizeVec[i]) >
2367 std::get<id<2>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2370 if (
static_cast<size_t>(ReqdWGSizeVec[i]) >
2371 std::get<id<3>>(MaxWorkItemSizesVariant)[Dims - i - 1])
2383 sycl::detail::ProgramManager::getInstance().addImages(desc);