44 using ContextImplPtr = std::shared_ptr<cl::sycl::detail::context_impl>;
50 static constexpr
char UseSpvEnv[](
"SYCL_USE_KERNEL_SPV");
57 constexpr
char SpecValue = 1;
64 return GlobalHandler::instance().getProgramManager();
69 const unsigned char *Data,
size_t DataLen,
70 const std::vector<pi_device_binary_property> Metadata) {
76 sizeof(NumDevices), &NumDevices,
78 assert(NumDevices > 0 &&
79 "Only a single device is supported for AOT compilation");
86 Context->getHandleRef(), 1 , &
PiDevice, &DataLen, &Data,
87 Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
89 if (BinaryStatus != CL_SUCCESS) {
90 throw runtime_error(
"Creating program with binary failed.", BinaryStatus);
97 const unsigned char *Data,
109 bool JITCompilationIsRequired) {
111 std::cerr <<
">>> ProgramManager::getDeviceImage(" << M <<
", \""
117 return getDeviceImage(M, KSId, Context,
Device, JITCompilationIsRequired);
120 template <
typename ExceptionT,
typename RetT>
126 int State = BuildResult->
State.load();
131 if (BuildResult->
Error.isFilledIn()) {
133 throw ExceptionT(Error.
Msg, Error.
Code);
136 RetT *Result = BuildResult->
Ptr.load();
160 template <
typename RetT,
typename ExceptionT,
typename KeyT,
typename AcquireFT,
161 typename GetCacheFT,
typename BuildFT>
164 GetCacheFT &&GetCache, BuildFT &&Build) {
165 bool InsertionTookPlace;
169 auto LockedCache = Acquire(KPCache);
170 auto &Cache = GetCache(LockedCache);
172 Cache.emplace(std::piecewise_construct, std::forward_as_tuple(CacheKey),
175 InsertionTookPlace = Inserted.second;
176 BuildResult = &Inserted.first->second;
181 if (!InsertionTookPlace) {
183 RetT *Result = waitUntilBuilt<ExceptionT>(KPCache, BuildResult);
193 if (BuildResult->
State.compare_exchange_strong(Expected, Desired))
200 RetT *Desired = Build();
203 RetT *Expected =
nullptr;
205 if (!BuildResult->
Ptr.compare_exchange_strong(Expected, Desired))
207 assert(
false &&
"We've build an entity that is already have been built.");
209 BuildResult->
Ptr.store(Desired);
233 std::rethrow_exception(std::current_exception());
242 std::rethrow_exception(std::current_exception());
257 if (ContextBackend == backend::ext_oneapi_cuda)
263 for (
const device &D : Devices) {
264 if (!D.get_info<info::device::is_compiler_available>())
269 if (ContextBackend == backend::opencl) {
271 if (ver.find(
"OpenCL 1.0") == std::string::npos &&
272 ver.find(
"OpenCL 1.1") == std::string::npos &&
273 ver.find(
"OpenCL 1.2") == std::string::npos &&
274 ver.find(
"OpenCL 2.0") == std::string::npos)
278 for (
const device &D : Devices) {
281 std::vector<std::string> Extensions =
282 D.get_info<info::device::extensions>();
283 if (Extensions.end() ==
284 std::find(Extensions.begin(), Extensions.end(),
"cl_khr_il_program"))
302 assert(
false &&
"Unknown device image format");
310 std::cerr <<
">>> ProgramManager::createPIProgram(" << &Img <<
", "
317 throw runtime_error(
"Malformed device program image descriptor",
318 PI_ERROR_INVALID_VALUE);
321 throw runtime_error(
"Invalid device program image: size is zero",
322 PI_ERROR_INVALID_VALUE);
324 size_t ImgSize = Img.
getSize();
341 "SPIR-V online compilation is not supported in this context",
342 PI_ERROR_INVALID_OPERATION);
346 std::vector<pi_device_binary_property> ProgMetadataVector{
347 ProgMetadata.
begin(), ProgMetadata.end()};
354 ImgSize, ProgMetadataVector);
357 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
359 NativePrograms[Res] = &Img;
363 std::cerr <<
"created program: " << Res
375 if (TemporaryStr !=
nullptr) {
376 if (!LinkOpts.empty())
378 LinkOpts += std::string(TemporaryStr);
384 const char *PropName) {
394 static const char *CompileOptsEnv =
398 if (!CompileOptsEnv) {
399 if (!CompileOpts.empty())
402 if (TemporaryStr !=
nullptr)
403 CompileOpts += std::string(TemporaryStr);
406 bool isDoubleGRFEsimdImage =
408 assert((!isDoubleGRFEsimdImage || isEsimdImage) &&
409 "doubleGRF applies only to ESIMD binary images");
413 if (!CompileOpts.empty())
415 CompileOpts +=
"-vc-codegen";
419 CompileOpts +=
" -disable-finalizer-msg";
421 if (isDoubleGRFEsimdImage) {
422 assert(!CompileOpts.empty());
423 CompileOpts +=
" -doubleGRF";
428 std::string &LinkOpts,
437 static const char *CompileOptsEnv =
439 if (CompileOptsEnv) {
440 CompileOpts = CompileOptsEnv;
449 LinkOpts = LinkOptsEnv;
454 std::string &LinkOpts) {
460 std::pair<RT::PiProgram, bool> ProgramManager::getOrCreatePIProgram(
462 const device &
Device,
const std::string &CompileAndLinkOptions,
466 auto BinProg = PersistentDeviceCodeCache::getItemFromDisc(
467 Device, Img, SpecConsts, CompileAndLinkOptions);
468 if (BinProg.size()) {
471 std::vector<pi_device_binary_property> ProgMetadataVector{
472 ProgMetadata.
begin(), ProgMetadata.end()};
476 (
const unsigned char *)BinProg[0].data(),
477 BinProg[0].size(), ProgMetadataVector);
479 NativePrg = createPIProgram(Img, Context,
Device);
481 return {NativePrg, BinProg.size()};
489 std::string ProgramBuildLog =
490 ProgramManager::getProgramBuildLog(Prog, Context);
491 std::clog << ProgramBuildLog << std::endl;
497 const DeviceImplPtr &DeviceImpl,
const std::string &KernelName,
498 const program_impl *Prg,
bool JITCompilationIsRequired) {
512 return LockedCache.get();
515 std::string CompileOpts;
516 std::string LinkOpts;
530 while (!RootDevImpl->isRootDevice()) {
532 RootDevImpl->get_info<info::device::parent_device>());
534 if (!ContextImpl->hasDevice(ParentDev))
536 RootDevImpl = ParentDev;
542 sizeof(
pi_bool), &MustBuildOnSubdevice,
nullptr);
545 (MustBuildOnSubdevice ==
PI_TRUE) ? DeviceImpl : RootDevImpl;
546 auto BuildF = [
this, &M, &KSId, &ContextImpl, &Dev, Prg, &CompileOpts,
547 &LinkOpts, &JITCompilationIsRequired, SpecConsts] {
548 auto Context = createSyclObjFromImpl<context>(ContextImpl);
549 auto Device = createSyclObjFromImpl<device>(Dev);
552 getDeviceImage(M, KSId, Context,
Device, JITCompilationIsRequired);
557 auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
558 Img, Context,
Device, CompileOpts + LinkOpts, SpecConsts);
560 if (!DeviceCodeWasInCache) {
562 flushSpecConstants(*Prg, NativePrg, &Img);
567 ProgramPtr ProgramManaged(
576 uint32_t DeviceLibReqMask = 0;
577 if (!DeviceCodeWasInCache &&
580 DeviceLibReqMask = getDeviceLibReqMask(Img);
582 ProgramPtr BuiltProgram =
583 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
589 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
590 NativePrograms[BuiltProgram.get()] = &Img;
594 if (!DeviceCodeWasInCache)
595 PersistentDeviceCodeCache::putItemToDisc(
596 Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.
get());
597 return BuiltProgram.release();
602 auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
604 std::make_pair(std::make_pair(std::move(SpecConsts), KSId),
605 std::make_pair(
PiDevice, CompileOpts + LinkOpts)),
606 AcquireF, GetF, BuildF);
608 assert(BuildResult !=
nullptr &&
"Invalid build result");
609 return BuildResult->Ptr.load();
612 std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
616 const std::string &KernelName,
619 std::cerr <<
">>> ProgramManager::getOrCreateKernel(" << M <<
", "
620 << ContextImpl.get() <<
", " << DeviceImpl.get() <<
", "
621 << KernelName <<
")\n";
630 std::string CompileOpts, LinkOpts;
640 CompileOpts + LinkOpts, KernelName);
642 if (std::get<0>(ret_tuple))
646 getBuiltPIProgram(M, ContextImpl, DeviceImpl, KernelName, Prg);
653 return LockedCache.get()[Program];
655 auto BuildF = [&Program, &KernelName, &ContextImpl] {
656 PiKernelT *Result =
nullptr;
660 Program, KernelName.c_str(), &Result);
670 auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
671 Cache, KernelName, AcquireF, GetF, BuildF);
673 assert(BuildResult !=
nullptr &&
"Invalid build result");
675 &(BuildResult->MBuildResultMutex), Program);
690 std::string ProgramManager::getProgramBuildLog(
const RT::PiProgram &Program,
692 size_t PIDevicesSize = 0;
695 nullptr, &PIDevicesSize);
696 std::vector<RT::PiDevice> PIDevices(PIDevicesSize /
sizeof(
RT::PiDevice));
698 PIDevicesSize, PIDevices.data(),
700 std::string Log =
"The program was built for " +
701 std::to_string(PIDevices.size()) +
" devices";
703 std::string DeviceBuildInfoString;
704 size_t DeviceBuildInfoStrSize = 0;
707 &DeviceBuildInfoStrSize);
708 if (DeviceBuildInfoStrSize > 0) {
709 std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
712 DeviceBuildInfo.data(),
nullptr);
713 DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
716 std::string DeviceNameString;
717 size_t DeviceNameStrSize = 0;
719 nullptr, &DeviceNameStrSize);
720 if (DeviceNameStrSize > 0) {
721 std::vector<char> DeviceName(DeviceNameStrSize);
724 DeviceName.data(),
nullptr);
725 DeviceNameString = std::string(DeviceName.data());
727 Log +=
"\nBuild program log for '" + DeviceNameString +
"':\n" +
728 DeviceBuildInfoString;
738 std::string LibSyclDir = OSUtil::getCurrentDSODir();
739 std::ifstream File(LibSyclDir + OSUtil::DirSep + Name,
740 std::ifstream::in | std::ifstream::binary);
745 File.seekg(0, std::ios::end);
746 size_t FileSize = File.tellg();
747 File.seekg(0, std::ios::beg);
748 std::vector<char> FileContent(FileSize);
749 File.read(&FileContent[0], FileSize);
754 return Prog !=
nullptr;
759 case DeviceLibExt::cl_intel_devicelib_assert:
760 return "libsycl-fallback-cassert.spv";
761 case DeviceLibExt::cl_intel_devicelib_math:
762 return "libsycl-fallback-cmath.spv";
763 case DeviceLibExt::cl_intel_devicelib_math_fp64:
764 return "libsycl-fallback-cmath-fp64.spv";
765 case DeviceLibExt::cl_intel_devicelib_complex:
766 return "libsycl-fallback-complex.spv";
767 case DeviceLibExt::cl_intel_devicelib_complex_fp64:
768 return "libsycl-fallback-complex-fp64.spv";
769 case DeviceLibExt::cl_intel_devicelib_cstring:
770 return "libsycl-fallback-cstring.spv";
771 case DeviceLibExt::cl_intel_devicelib_imf:
772 return "libsycl-fallback-imf.spv";
773 case DeviceLibExt::cl_intel_devicelib_imf_fp64:
774 return "libsycl-fallback-imf-fp64.spv";
777 PI_ERROR_INVALID_OPERATION);
782 case DeviceLibExt::cl_intel_devicelib_assert:
783 return "cl_intel_devicelib_assert";
784 case DeviceLibExt::cl_intel_devicelib_math:
785 return "cl_intel_devicelib_math";
786 case DeviceLibExt::cl_intel_devicelib_math_fp64:
787 return "cl_intel_devicelib_math_fp64";
788 case DeviceLibExt::cl_intel_devicelib_complex:
789 return "cl_intel_devicelib_complex";
790 case DeviceLibExt::cl_intel_devicelib_complex_fp64:
791 return "cl_intel_devicelib_complex_fp64";
792 case DeviceLibExt::cl_intel_devicelib_cstring:
793 return "cl_intel_devicelib_cstring";
794 case DeviceLibExt::cl_intel_devicelib_imf:
795 return "cl_intel_devicelib_imf";
796 case DeviceLibExt::cl_intel_devicelib_imf_fp64:
797 return "cl_intel_devicelib_imf_fp64";
800 PI_ERROR_INVALID_OPERATION);
809 auto LockedCache = Context->acquireCachedLibPrograms();
810 auto CachedLibPrograms = LockedCache.get();
811 auto CacheResult = CachedLibPrograms.emplace(
812 std::make_pair(std::make_pair(Extension,
Device),
nullptr));
813 bool Cached = !CacheResult.second;
814 auto LibProgIt = CacheResult.first;
821 CachedLibPrograms.erase(LibProgIt);
823 PI_ERROR_INVALID_VALUE);
835 "", 0,
nullptr,
nullptr,
nullptr,
nullptr);
836 if (Error != PI_SUCCESS) {
837 CachedLibPrograms.erase(LibProgIt);
839 ProgramManager::getProgramBuildLog(LibProg, Context), Error);
845 ProgramManager::ProgramManager() {
846 const char *SpvFile = std::getenv(
UseSpvEnv);
853 std::ifstream File(SpvFile, std::ios::binary);
856 throw runtime_error(std::string(
"Can't open file specified via ") +
858 PI_ERROR_INVALID_VALUE);
859 File.seekg(0, std::ios::end);
860 size_t Size = File.tellg();
861 std::unique_ptr<char[]> Data(
new char[Size]);
863 File.read(Data.get(), Size);
866 throw runtime_error(std::string(
"read from ") + SpvFile +
867 std::string(
" failed"),
868 PI_ERROR_INVALID_VALUE);
869 auto ImgPtr = make_unique_ptr<DynRTDeviceBinaryImage>(
870 std::move(Data), Size, OSUtil::DummyModuleHandle);
873 std::cerr <<
"loaded device image binary from " << SpvFile <<
"\n";
874 std::cerr <<
"format: " <<
getFormatStr(ImgPtr->getFormat()) <<
"\n";
880 new std::vector<RTDeviceBinaryImageUPtr>());
881 m_DeviceImages[
SpvFileKSId]->push_back(std::move(ImgPtr));
888 bool JITCompilationIsRequired) {
890 std::cerr <<
">>> ProgramManager::getDeviceImage(" << M <<
", \"" << KSId
895 std::cerr <<
"available device images:\n";
896 debugPrintBinaryImages();
898 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
899 std::vector<RTDeviceBinaryImageUPtr> &Imgs = *m_DeviceImages[KSId];
910 std::vector<pi_device_binary> RawImgs(Imgs.size());
911 for (
unsigned I = 0; I < Imgs.size(); I++)
918 if (JITCompilationIsRequired) {
928 PI_ERROR_INVALID_OPERATION);
932 Img = Imgs[ImgInd].
get();
935 std::cerr <<
"selected device image: " << &Img->
getRawData() <<
"\n";
943 0x1 << (
static_cast<uint32_t
>(Ext) -
944 static_cast<uint32_t
>(DeviceLibExt::cl_intel_devicelib_assert));
945 return ((DeviceLibReqMask & Mask) == Mask);
948 static std::vector<RT::PiProgram>
950 uint32_t DeviceLibReqMask) {
951 std::vector<RT::PiProgram> Programs;
953 std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
954 {DeviceLibExt::cl_intel_devicelib_assert,
956 {DeviceLibExt::cl_intel_devicelib_math,
false},
957 {DeviceLibExt::cl_intel_devicelib_math_fp64,
false},
958 {DeviceLibExt::cl_intel_devicelib_complex,
false},
959 {DeviceLibExt::cl_intel_devicelib_complex_fp64,
false},
960 {DeviceLibExt::cl_intel_devicelib_cstring,
false},
961 {DeviceLibExt::cl_intel_devicelib_imf,
false},
962 {DeviceLibExt::cl_intel_devicelib_imf_fp64,
false}};
966 std::string DevExtList =
968 Device, Context->getPlugin());
969 const bool fp64Support = (DevExtList.npos != DevExtList.find(
"cl_khr_fp64"));
973 for (
auto &Pair : RequiredDeviceLibExt) {
975 bool &FallbackIsLoaded = Pair.second;
977 if (FallbackIsLoaded) {
984 if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 ||
985 Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) &&
992 bool InhibitNativeImpl =
false;
993 if (
const char *Env = getenv(
"SYCL_DEVICELIB_INHIBIT_NATIVE")) {
994 InhibitNativeImpl = strstr(Env, ExtStr) !=
nullptr;
997 bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr);
999 if (!DeviceSupports || InhibitNativeImpl) {
1001 FallbackIsLoaded =
true;
1007 ProgramManager::ProgramPtr
1009 const std::string &CompileOptions,
1010 const std::string &LinkOptions,
1014 std::cerr <<
">>> ProgramManager::build(" << Program.get() <<
", "
1015 << CompileOptions <<
", " << LinkOptions <<
", ... " <<
Device
1028 DeviceLibReqMask &= 0xFFFFFFFE;
1029 bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1034 if (CompileOptions.find(std::string(
"-cmc")) != std::string::npos ||
1035 CompileOptions.find(std::string(
"-vc-codegen")) != std::string::npos)
1036 LinkDeviceLibs =
false;
1038 std::vector<RT::PiProgram> LinkPrograms;
1039 if (LinkDeviceLibs) {
1043 static const char *ForceLinkEnv = std::getenv(
"SYCL_FORCE_LINK");
1044 static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv ==
'1');
1046 const detail::plugin &Plugin = Context->getPlugin();
1047 if (LinkPrograms.empty() && !ForceLink) {
1048 const std::string &Options = LinkOptions.empty()
1050 : (CompileOptions +
" " + LinkOptions);
1052 Program.get(), 1, &
Device, Options.c_str(),
nullptr,
1054 if (Error != PI_SUCCESS)
1062 &
Device, CompileOptions.c_str(), 0,
1063 nullptr,
nullptr,
nullptr,
nullptr);
1064 LinkPrograms.push_back(Program.get());
1068 Context->getHandleRef(), 1, &
Device, LinkOptions.c_str(),
1069 LinkPrograms.size(), LinkPrograms.data(),
nullptr,
nullptr, &LinkedProg);
1073 Program.reset(LinkedProg);
1074 if (Error != PI_SUCCESS) {
1081 Plugin.checkPiResult(Error);
1086 static ProgramManager::KernelArgMask
1088 const int NBytesForSize = 8;
1089 const int NBitsInElement = 8;
1090 std::uint64_t SizeInBits = 0;
1091 for (
int I = 0; I < NBytesForSize; ++I)
1092 SizeInBits |=
static_cast<std::uint64_t
>(Bytes[I]) << I * NBitsInElement;
1095 for (std::uint64_t I = 0; I < SizeInBits; ++I) {
1096 std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
1097 Result.push_back(Byte & (1 << (I % NBitsInElement)));
1108 for (
const auto &Prop : AssertUsedRange) {
1109 KernelNameWithOSModule Key{Prop->Name, M};
1110 m_KernelUsesAssert.insert(Key);
1115 const std::string &KernelName)
const {
1116 KernelNameWithOSModule Key{KernelName, M};
1117 return m_KernelUsesAssert.find(Key) != m_KernelUsesAssert.
end();
1121 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1122 const bool DumpImages = std::getenv(
"SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1129 auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);
1135 KernelNameToArgMaskMap &ArgMaskMap =
1136 m_EliminatedKernelArgMasks[Img.
get()];
1137 for (
const auto &Info : KPOIRange)
1138 ArgMaskMap[Info->Name] =
1143 if (EntriesB != EntriesE) {
1144 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1149 m_ExportedSymbols.insert(ExportedSymbol->Name);
1151 m_BinImg2KernelIDs[Img.
get()].reset(
new std::vector<kernel_id>);
1160 if (std::strstr(EntriesIt->name,
"__sycl_service_kernel__")) {
1161 m_ServiceKernels.insert(EntriesIt->name);
1168 if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1172 auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1173 if (It == m_KernelName2KernelIDs.end()) {
1174 std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1175 std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1177 detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1179 It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name,
1183 m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.
get()));
1184 m_BinImg2KernelIDs[Img.
get()]->push_back(It->second);
1188 std::sort(m_BinImg2KernelIDs[Img.
get()]->begin(),
1195 if (EntriesB != EntriesE) {
1198 StrToKSIdMap &KSIdMap = m_KernelSets[M];
1199 auto KSIdIt = KSIdMap.find(EntriesB->
name);
1200 if (KSIdIt != KSIdMap.end()) {
1203 assert(KSIdMap[EntriesIt->name] == KSIdIt->second &&
1204 "Kernel sets are not disjoint");
1205 auto &Imgs = m_DeviceImages[KSIdIt->second];
1206 assert(Imgs &&
"Device image vector should have been already created");
1208 cacheKernelUsesAssertInfo(M, *Img);
1210 Imgs->push_back(std::move(Img));
1216 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1220 KSIdMap.insert(std::make_pair(EntriesIt->name, KSId));
1225 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1237 assert(DeviceGlobalInfo.
size() == 16 &&
"Unexpected property size");
1238 const std::uint32_t TypeSize =
1239 *
reinterpret_cast<const std::uint32_t *
>(&DeviceGlobalInfo[8]);
1240 const std::uint32_t DeviceImageScopeDecorated =
1241 *
reinterpret_cast<const std::uint32_t *
>(&DeviceGlobalInfo[12]);
1243 auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1244 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1246 ExistingDeviceGlobal->second->initialize(TypeSize,
1247 DeviceImageScopeDecorated);
1252 auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1253 DeviceGlobal->Name, TypeSize, DeviceImageScopeDecorated);
1254 m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1258 m_DeviceImages[KSId].reset(
new std::vector<RTDeviceBinaryImageUPtr>());
1259 cacheKernelUsesAssertInfo(M, *Img);
1262 dumpImage(*Img, KSId);
1263 m_DeviceImages[KSId]->push_back(std::move(Img));
1271 KSId = getNextKernelSetId();
1273 auto &Imgs = m_DeviceImages[KSId];
1275 Imgs.reset(
new std::vector<RTDeviceBinaryImageUPtr>());
1277 cacheKernelUsesAssertInfo(M, *Img);
1280 dumpImage(*Img, KSId);
1281 Imgs->push_back(std::move(Img));
1285 void ProgramManager::debugPrintBinaryImages()
const {
1286 for (
const auto &ImgVecIt : m_DeviceImages) {
1287 std::cerr <<
" ++++++ Kernel set: " << ImgVecIt.first <<
"\n";
1288 for (
const auto &Img : *ImgVecIt.second)
1293 KernelSetId ProgramManager::getNextKernelSetId()
const {
1301 const std::string &KernelName)
const {
1304 if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1306 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1307 auto KSIdMapIt = m_KernelSets.find(M);
1308 if (KSIdMapIt != m_KernelSets.end()) {
1309 const StrToKSIdMap &KSIdMap = KSIdMapIt->second;
1310 auto KSIdIt = KSIdMap.find(KernelName);
1312 if (KSIdIt != KSIdMap.end())
1313 return KSIdIt->second;
1317 auto ModuleKSIdIt = m_OSModuleKernelSets.find(M);
1318 if (ModuleKSIdIt != m_OSModuleKernelSets.end())
1319 return ModuleKSIdIt->second;
1321 throw runtime_error(
"No kernel named " + KernelName +
" was found",
1322 PI_ERROR_INVALID_KERNEL_NAME);
1325 void ProgramManager::dumpImage(
const RTDeviceBinaryImage &Img,
1327 std::string Fname(
"sycl_");
1330 Fname += std::to_string(KSId);
1342 std::ofstream F(Fname, std::ios::binary);
1345 throw runtime_error(
"Can not write " + Fname, PI_ERROR_UNKNOWN);
1355 std::cerr <<
">>> ProgramManager::flushSpecConstants(" << Prg.
get()
1362 assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1363 NativePrg = NativePrg ? NativePrg : PrgHandle;
1368 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1369 auto It = NativePrograms.find(NativePrg);
1370 if (It == NativePrograms.end())
1372 "spec constant is set in a program w/o a binary image",
1373 PI_ERROR_INVALID_OPERATION);
1378 std::cerr <<
">>> ProgramManager::flushSpecConstants: binary image "
1379 << &Img->
getRawData() <<
" doesn't support spec constants\n";
1407 if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1411 if (m_EliminatedKernelArgMasks.empty())
1415 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1416 auto ImgIt = NativePrograms.find(NativePrg);
1417 if (ImgIt != NativePrograms.end()) {
1418 auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1419 if (MapIt != m_EliminatedKernelArgMasks.end())
1420 return MapIt->second[KernelName];
1427 for (
auto &Elem : m_EliminatedKernelArgMasks) {
1428 auto ArgMask = Elem.second.find(KernelName);
1429 if (ArgMask != Elem.second.end())
1430 return ArgMask->second;
1438 auto IsAOTBinary = [](
const char *Format) {
1450 return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1455 const std::shared_ptr<detail::device_impl> &DeviceImpl =
1457 auto &Plugin = DeviceImpl->getPlugin();
1459 const RT::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef();
1468 PIDeviceHandle, &DevBin,
1470 if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1471 throw runtime_error(
"Invalid binary image or device",
1472 PI_ERROR_INVALID_VALUE);
1474 return (0 == SuitableImageID);
1477 kernel_id ProgramManager::getSYCLKernelID(
const std::string &KernelName) {
1478 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1480 auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1481 if (KernelID == m_KernelName2KernelIDs.end())
1482 throw runtime_error(
"No kernel found with the specified name",
1483 PI_ERROR_INVALID_KERNEL_NAME);
1485 return KernelID->second;
1488 bool ProgramManager::hasCompatibleImage(
const device &Dev) {
1489 std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1492 m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1494 std::shared_ptr<std::vector<kernel_id>>>
1495 Elem) { return compatibleWithDevice(Elem.first, Dev); });
1498 std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
1499 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1501 std::vector<sycl::kernel_id> AllKernelIDs;
1502 AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1503 for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1504 AllKernelIDs.push_back(KernelID.second);
1506 return AllKernelIDs;
1509 kernel_id ProgramManager::getBuiltInKernelID(
const std::string &KernelName) {
1510 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1512 auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1513 if (KernelID == m_BuiltInKernelIDs.end()) {
1514 auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1515 auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1516 KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1519 return KernelID->second;
1522 void ProgramManager::addOrInitDeviceGlobalEntry(
const void *DeviceGlobalPtr,
1523 const char *UniqueId) {
1524 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1526 auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1527 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1529 ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1530 m_Ptr2DeviceGlobal.insert(
1531 {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1536 std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1537 auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1538 m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1541 std::vector<device_image_plain>
1542 ProgramManager::getSYCLDeviceImagesWithCompatibleState(
1543 const context &Ctx,
const std::vector<device> &Devs,
1544 bundle_state TargetState,
const std::vector<kernel_id> &KernelIDs) {
1548 std::set<RTDeviceBinaryImage *> BinImages;
1549 if (!KernelIDs.empty()) {
1550 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1551 for (
const kernel_id &KID : KernelIDs) {
1552 auto Range = m_KernelIDs2BinImage.equal_range(KID);
1553 for (
auto It = Range.first, End = Range.second; It != End; ++It)
1554 BinImages.insert(It->second);
1557 std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
1558 for (
auto &ImagesSets : m_DeviceImages) {
1559 auto &ImagesUPtrs = *ImagesSets.second.get();
1560 for (
auto &ImageUPtr : ImagesUPtrs)
1561 BinImages.insert(ImageUPtr.get());
1564 assert(BinImages.size() > 0 &&
"Expected to find at least on device image");
1566 std::vector<device_image_plain> SYCLDeviceImages;
1579 if (ImgState > TargetState)
1586 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1589 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1590 KernelIDs = m_BinImg2KernelIDs[BinImage];
1592 if (!KernelIDs || KernelIDs->empty())
1597 BinImage, Ctx, Devs, ImgState, KernelIDs,
nullptr);
1599 SYCLDeviceImages.push_back(
1600 createSyclObjFromImpl<device_image_plain>(Impl));
1605 return SYCLDeviceImages;
1608 void ProgramManager::bringSYCLDeviceImagesToState(
1609 std::vector<device_image_plain> &DeviceImages,
bundle_state TargetState) {
1614 switch (TargetState) {
1615 case bundle_state::input:
1617 assert(DevImageState == bundle_state::input);
1619 case bundle_state::object:
1620 if (DevImageState == bundle_state::input) {
1626 assert(DevImageState == bundle_state::object);
1628 case bundle_state::executable: {
1629 switch (DevImageState) {
1630 case bundle_state::input:
1634 case bundle_state::object: {
1635 std::vector<device_image_plain> LinkedDevImages =
1640 assert(LinkedDevImages.size() == 1 &&
"Expected one linked image here");
1641 DevImage = LinkedDevImages[0];
1644 case bundle_state::executable:
1655 std::vector<device_image_plain>
1656 ProgramManager::getSYCLDeviceImages(
const context &Ctx,
1657 const std::vector<device> &Devs,
1660 std::vector<device_image_plain> DeviceImages =
1661 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1663 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1664 return DeviceImages;
1667 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1668 const context &Ctx,
const std::vector<device> &Devs,
1671 std::vector<device_image_plain> DeviceImages =
1672 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1675 auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1677 return !Selector(getSyclObjImpl(Image));
1679 DeviceImages.erase(It, DeviceImages.end());
1683 return DeviceImages;
1686 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1687 const context &Ctx,
const std::vector<device> &Devs,
1688 const std::vector<kernel_id> &KernelIDs,
bundle_state TargetState) {
1690 if (KernelIDs.empty())
1694 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1696 for (
auto &It : m_BuiltInKernelIDs) {
1697 if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1700 "Attempting to use a built-in kernel. They are "
1701 "not fully supported");
1706 std::vector<device_image_plain> DeviceImages =
1707 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1710 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1711 return DeviceImages;
1716 const std::vector<device> &Devs,
1723 const std::shared_ptr<device_image_impl> &InputImpl =
1730 if (InputImpl->get_bin_image_ref()->getFormat() !=
1733 sycl::runtime_error(
1734 "Creating a program from AOT binary for multiple device is not "
1736 PI_ERROR_INVALID_OPERATION);
1740 RT::PiProgram Prog = createPIProgram(*InputImpl->get_bin_image_ref(),
1741 InputImpl->get_context(), Devs[0]);
1743 if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
1747 InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
1748 bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog,
1749 InputImpl->get_spec_const_data_ref(),
1750 InputImpl->get_spec_const_blob_ref());
1752 std::vector<pi_device> PIDevices;
1753 PIDevices.reserve(Devs.size());
1754 for (
const device &Dev : Devs)
1760 std::string CompileOptions;
1763 *(InputImpl->get_bin_image_ref()));
1765 ObjectImpl->get_program_ref(), Devs.size(),
1766 PIDevices.data(), CompileOptions.c_str(),
1770 if (Error != PI_SUCCESS)
1773 getProgramBuildLog(ObjectImpl->get_program_ref(),
1776 return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
1779 std::vector<device_image_plain>
1781 const std::vector<device> &Devs,
1785 std::vector<pi_program> PIPrograms;
1786 PIPrograms.reserve(DeviceImages.size());
1788 PIPrograms.push_back(
getSyclObjImpl(DeviceImage)->get_program_ref());
1790 std::vector<pi_device> PIDevices;
1791 PIDevices.reserve(Devs.size());
1792 for (
const device &Dev : Devs)
1795 std::string LinkOptionsStr;
1797 if (LinkOptionsStr.empty()) {
1799 const std::shared_ptr<device_image_impl> &InputImpl =
1802 *(InputImpl->get_bin_image_ref()));
1811 ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
1812 LinkOptionsStr.c_str(), PIPrograms.size(), PIPrograms.data(),
1814 nullptr, &LinkedProg);
1816 if (Error != PI_SUCCESS) {
1818 const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
1824 std::shared_ptr<std::vector<kernel_id>> KernelIDs{
new std::vector<kernel_id>};
1836 std::make_shared<detail::device_image_impl>(
1837 nullptr, Context, Devs, bundle_state::executable,
1838 std::move(KernelIDs), LinkedProg);
1842 return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
1850 const std::vector<device> &Devs,
1854 const std::shared_ptr<device_image_impl> &InputImpl =
1857 const context Context = InputImpl->get_context();
1870 return LockedCache.get();
1873 std::string CompileOpts;
1874 std::string LinkOpts;
1880 SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
1883 auto BuildF = [
this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
1884 &InputImpl, SpecConsts] {
1890 if (InputImpl->get_bin_image_ref()->getFormat() !=
1893 sycl::runtime_error(
1894 "Creating a program from AOT binary for multiple device is not "
1896 PI_ERROR_INVALID_OPERATION);
1900 auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
1901 Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
1903 if (!DeviceCodeWasInCache &&
1904 InputImpl->get_bin_image_ref()->supportsSpecConstants()) {
1907 std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
1908 const std::map<std::string,
1909 std::vector<device_image_impl::SpecConstDescT>>
1910 &SpecConstData = InputImpl->get_spec_const_data_ref();
1912 for (
const auto &DescPair : SpecConstData) {
1915 if (SpecIDDesc.
IsSet) {
1917 NativePrg, SpecIDDesc.
ID, SpecIDDesc.
Size,
1924 ProgramPtr ProgramManaged(
1932 uint32_t DeviceLibReqMask = 0;
1935 DeviceLibReqMask = getDeviceLibReqMask(Img);
1937 ProgramPtr BuiltProgram =
1938 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
1944 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1945 NativePrograms[BuiltProgram.get()] = &Img;
1949 if (!DeviceCodeWasInCache)
1950 PersistentDeviceCodeCache::putItemToDisc(
1951 Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
1953 return BuiltProgram.release();
1958 auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
1960 std::make_pair(std::make_pair(std::move(SpecConsts), (
size_t)ImgPtr),
1961 std::make_pair(
PiDevice, CompileOpts + LinkOpts)),
1962 AcquireF, GetF, BuildF);
1964 assert(BuildResult !=
nullptr &&
"Invalid build result");
1972 auto CacheOtherDevices = [ResProgram, &Plugin]() {
1979 for (
size_t Idx = 1; Idx < Devs.size(); ++Idx) {
1983 getOrBuild<PiProgramT, compile_program_error>(
1985 std::make_pair(std::make_pair(std::move(SpecConsts), (
size_t)ImgPtr),
1986 std::make_pair(PiDeviceAdd, CompileOpts + LinkOpts)),
1987 AcquireF, GetF, CacheOtherDevices);
1989 assert(BuildResult !=
nullptr &&
"Invalid build result");
1998 InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable,
1999 InputImpl->get_kernel_ids_ptr(), ResProgram,
2000 InputImpl->get_spec_const_data_ref(),
2001 InputImpl->get_spec_const_blob_ref());
2003 return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2006 std::pair<RT::PiKernel, std::mutex *> ProgramManager::getOrCreateKernel(
2007 const context &Context,
const std::string &KernelName,
2025 return LockedCache.get()[Program];
2027 auto BuildF = [&Program, &KernelName, &Ctx] {
2028 PiKernelT *Result =
nullptr;
2040 auto BuildResult = getOrBuild<PiKernelT, invalid_object_error>(
2041 Cache, KernelName, AcquireF, GetF, BuildF);
2043 assert(BuildResult !=
nullptr &&
"Invalid build result");
2044 return std::make_pair(BuildResult->Ptr.load(),
2045 &(BuildResult->MBuildResultMutex));