47 inline namespace _V1 {
50 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
54 static constexpr
char UseSpvEnv[](
"SYCL_USE_KERNEL_SPV");
62 constexpr
char SpecValue = 1;
74 const unsigned char *Data,
size_t DataLen,
75 const std::vector<pi_device_binary_property> Metadata) {
76 const PluginPtr &Plugin = Context->getPlugin();
81 sizeof(NumDevices), &NumDevices,
83 assert(NumDevices > 0 &&
84 "Only a single device is supported for AOT compilation");
92 Context->getHandleRef(), 1 , &
PiDevice, &DataLen, &Data,
93 Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
95 if (BinaryStatus != CL_SUCCESS) {
96 throw runtime_error(
"Creating program with binary failed.", BinaryStatus);
106 const PluginPtr &Plugin = Context->getPlugin();
129 for (
const device &D : Devices) {
130 if (!D.get_info<info::device::is_compiler_available>())
137 if (ver.find(
"OpenCL 1.0") == std::string::npos &&
138 ver.find(
"OpenCL 1.1") == std::string::npos &&
139 ver.find(
"OpenCL 1.2") == std::string::npos &&
140 ver.find(
"OpenCL 2.0") == std::string::npos)
144 for (
const device &D : Devices) {
147 std::vector<std::string> Extensions =
148 D.get_info<info::device::extensions>();
149 if (Extensions.end() ==
150 std::find(Extensions.begin(), Extensions.end(),
"cl_khr_il_program"))
168 assert(
false &&
"Unknown device image format");
176 std::cerr <<
">>> ProgramManager::createPIProgram(" << &Img <<
", "
183 throw runtime_error(
"Malformed device program image descriptor",
184 PI_ERROR_INVALID_VALUE);
187 throw runtime_error(
"Invalid device program image: size is zero",
188 PI_ERROR_INVALID_VALUE);
190 size_t ImgSize = Img.
getSize();
207 sycl::errc::feature_not_supported,
208 "SPIR-V online compilation is not supported in this context");
212 std::vector<pi_device_binary_property> ProgMetadataVector{
213 ProgMetadata.
begin(), ProgMetadata.end()};
224 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
226 NativePrograms[Res] = &Img;
229 Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
244 if (TemporaryStr !=
nullptr) {
245 if (!LinkOpts.empty())
247 LinkOpts += std::string(TemporaryStr);
253 const char *PropName) {
259 const char *PropName) {
261 std::stringstream ss;
265 if (optLevel < 0 || optLevel > 3)
267 ss <<
"-O" << optLevel;
268 std::string temp = ss.str();
282 if (!RegAllocModeProp && !GRFSizeProp)
286 assert(!RegAllocModeProp || !GRFSizeProp);
287 bool Is256GRF =
false;
288 bool IsAutoGRF =
false;
289 if (RegAllocModeProp) {
290 uint32_t RegAllocModePropVal =
292 Is256GRF = RegAllocModePropVal ==
294 IsAutoGRF = RegAllocModePropVal ==
299 Is256GRF = GRFSizePropVal == 256;
300 IsAutoGRF = GRFSizePropVal == 0;
303 if (!CompileOpts.empty())
306 CompileOpts += IsEsimdImage ?
"-doubleGRF" :
"-ze-opt-large-register-file";
309 if (!CompileOpts.empty())
312 CompileOpts +=
"-ze-intel-enable-auto-large-GRF-mode";
318 const std::vector<device> &Devs,
323 static const char *CompileOptsEnv =
327 if (!CompileOptsEnv) {
328 if (!CompileOpts.empty())
331 if (TemporaryStr !=
nullptr)
332 CompileOpts += std::string(TemporaryStr);
338 if (!CompileOpts.empty())
340 CompileOpts +=
"-vc-codegen";
344 CompileOpts +=
" -disable-finalizer-msg";
353 const char *optLevelStr = str.c_str();
358 if (!isEsimdImage && !CompileOptsEnv && optLevelStr !=
nullptr &&
359 optLevelStr[0] !=
'\0') {
361 assert(!Devs.empty() &&
363 return Dev.get_platform() == Devs[0].get_platform();
365 const char *backend_option =
nullptr;
368 PlatformImpl->getBackendOption(optLevelStr, &backend_option);
369 if (backend_option && backend_option[0] !=
'\0') {
370 if (!CompileOpts.empty())
372 CompileOpts += std::string(backend_option);
379 return Dev.is_gpu() &&
380 Dev.get_info<info::device::vendor_id>() == 0x8086;
382 if (!CompileOptsEnv) {
383 static const char *TargetCompileFast =
"-ftarget-compile-fast";
384 if (
auto Pos = CompileOpts.find(TargetCompileFast);
385 Pos != std::string::npos) {
386 const char *BackendOption =
nullptr;
388 PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
389 auto OptLen = strlen(TargetCompileFast);
390 if (IsIntelGPU && BackendOption && BackendOption[0] !=
'\0')
391 CompileOpts.replace(Pos, OptLen, BackendOption);
393 CompileOpts.erase(Pos, OptLen);
395 static const std::string TargetRegisterAllocMode =
396 "-ftarget-register-alloc-mode=";
397 auto OptPos = CompileOpts.find(TargetRegisterAllocMode);
398 while (OptPos != std::string::npos) {
399 auto EndOfOpt = CompileOpts.find(
" ", OptPos);
401 auto OptValue = CompileOpts.substr(
402 OptPos + TargetRegisterAllocMode.size(),
403 EndOfOpt - OptPos - TargetRegisterAllocMode.size());
404 auto ColonPos = OptValue.find(
":");
405 auto Device = OptValue.substr(0, ColonPos);
406 std::string BackendStrToAdd;
410 (Dev.get_info<ext::intel::info::device::device_id>() &
414 if (Device ==
"pvc" && IsPVC)
415 BackendStrToAdd =
" " + OptValue.substr(ColonPos + 1) +
" ";
418 std::string NewCompileOpts =
419 CompileOpts.substr(0, OptPos) + BackendStrToAdd;
421 if (EndOfOpt != std::string::npos)
422 NewCompileOpts += CompileOpts.substr(EndOfOpt);
423 CompileOpts = NewCompileOpts;
424 OptPos = CompileOpts.find(TargetRegisterAllocMode);
431 static const char *AppendCompileOptsEnv =
433 if (AppendCompileOptsEnv) {
434 if (!CompileOpts.empty())
436 CompileOpts += AppendCompileOptsEnv;
440 static const char *AppendLinkOptsEnv =
442 if (AppendLinkOptsEnv) {
443 if (!LinkOpts.empty())
445 LinkOpts += AppendLinkOptsEnv;
450 std::string &LinkOpts,
452 const std::vector<device> &Devices,
461 static const char *CompileOptsEnv =
463 if (CompileOptsEnv) {
464 CompileOpts = CompileOptsEnv;
473 LinkOpts = LinkOptsEnv;
478 std::string &LinkOpts) {
484 std::pair<sycl::detail::pi::PiProgram, bool>
488 const std::string &CompileAndLinkOptions,
493 Device, Img, SpecConsts, CompileAndLinkOptions);
494 if (BinProg.size()) {
497 std::vector<pi_device_binary_property> ProgMetadataVector{
498 ProgMetadata.
begin(), ProgMetadata.end()};
502 (
const unsigned char *)BinProg[0].data(),
503 BinProg[0].size(), ProgMetadataVector);
507 return {NativePrg, BinProg.size()};
515 std::string ProgramBuildLog =
517 std::clog << ProgramBuildLog << std::endl;
525 const std::string &KernelName,
const NDRDescT &NDRDesc,
526 bool JITCompilationIsRequired) {
529 std::string CompileOpts;
530 std::string LinkOpts;
539 while (!RootDevImpl->isRootDevice()) {
541 RootDevImpl->get_info<info::device::parent_device>());
543 if (!ContextImpl->hasDevice(ParentDev))
545 RootDevImpl = ParentDev;
551 sizeof(
pi_bool), &MustBuildOnSubdevice,
nullptr);
554 (MustBuildOnSubdevice ==
PI_TRUE) ? DeviceImpl : RootDevImpl;
555 auto Context = createSyclObjFromImpl<context>(ContextImpl);
556 auto Device = createSyclObjFromImpl<device>(Dev);
558 getDeviceImage(KernelName, Context, Device, JITCompilationIsRequired);
564 auto BuildF = [
this, &Img, &Context, &ContextImpl, &Device, &CompileOpts,
565 &LinkOpts, SpecConsts] {
566 const PluginPtr &Plugin = ContextImpl->getPlugin();
572 Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
574 if (!DeviceCodeWasInCache) {
579 ProgramPtr ProgramManaged(
580 NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
588 uint32_t DeviceLibReqMask = 0;
589 if (!DeviceCodeWasInCache &&
594 ProgramPtr BuiltProgram =
595 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
601 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
602 NativePrograms[BuiltProgram.get()] = &Img;
605 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
608 if (!DeviceCodeWasInCache)
610 Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.
get());
611 return BuiltProgram.release();
617 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
PiDevice);
619 auto GetCachedBuildF = [&Cache, &CacheKey]() {
627 Cache.
getOrBuild<compile_program_error>(GetCachedBuildF, BuildF);
629 assert(BuildResult !=
nullptr &&
"Invalid build result");
636 return BuildResult->Val;
645 const std::string &KernelName,
648 std::cerr <<
">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get()
649 <<
", " << DeviceImpl.get() <<
", " << KernelName <<
")\n";
656 std::string CompileOpts, LinkOpts;
665 CompileOpts + LinkOpts, KernelName);
668 constexpr
size_t Kernel = 0;
669 constexpr
size_t Program = 3;
670 if (std::get<Kernel>(ret_tuple)) {
674 std::get<Kernel>(ret_tuple));
676 std::get<Program>(ret_tuple));
684 auto BuildF = [
this, &Program, &KernelName, &ContextImpl] {
687 const PluginPtr &Plugin = ContextImpl->getPlugin();
689 Program, KernelName.c_str(), &Kernel);
692 if (ContextImpl->getPlatformImpl()->supports_usm()) {
702 return std::make_pair(Kernel, ArgMask);
705 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
713 auto [Kernel, ArgMask] = BuildF();
714 return make_tuple(Kernel,
nullptr, ArgMask, Program);
718 Cache.
getOrBuild<invalid_object_error>(GetCachedBuildF, BuildF);
720 assert(BuildResult !=
nullptr &&
"Invalid build result");
721 const KernelArgMaskPairT &KernelArgMaskPair = BuildResult->Val;
723 &(BuildResult->MBuildResultMutex),
724 KernelArgMaskPair.second, Program);
730 KernelArgMaskPair.first);
739 const PluginPtr &Plugin = Context->getPlugin();
749 size_t PIDevicesSize = 0;
750 const PluginPtr &Plugin = Context->getPlugin();
752 nullptr, &PIDevicesSize);
753 std::vector<sycl::detail::pi::PiDevice> PIDevices(
756 PIDevicesSize, PIDevices.data(),
758 std::string Log =
"The program was built for " +
759 std::to_string(PIDevices.size()) +
" devices";
761 std::string DeviceBuildInfoString;
762 size_t DeviceBuildInfoStrSize = 0;
765 &DeviceBuildInfoStrSize);
766 if (DeviceBuildInfoStrSize > 0) {
767 std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
770 DeviceBuildInfo.data(),
nullptr);
771 DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
774 std::string DeviceNameString;
775 size_t DeviceNameStrSize = 0;
777 nullptr, &DeviceNameStrSize);
778 if (DeviceNameStrSize > 0) {
779 std::vector<char> DeviceName(DeviceNameStrSize);
782 DeviceName.data(),
nullptr);
783 DeviceNameString = std::string(DeviceName.data());
785 Log +=
"\nBuild program log for '" + DeviceNameString +
"':\n" +
786 DeviceBuildInfoString;
798 std::ifstream::in | std::ifstream::binary);
803 File.seekg(0, std::ios::end);
804 size_t FileSize = File.tellg();
805 File.seekg(0, std::ios::beg);
806 std::vector<char> FileContent(FileSize);
807 File.read(&FileContent[0], FileSize);
812 return Prog !=
nullptr;
817 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
820 {
nullptr,
"libsycl-fallback-cassert.spv"}},
822 {
nullptr,
"libsycl-fallback-cmath.spv"}},
824 {
nullptr,
"libsycl-fallback-cmath-fp64.spv"}},
826 {
nullptr,
"libsycl-fallback-complex.spv"}},
828 {
nullptr,
"libsycl-fallback-complex-fp64.spv"}},
830 {
nullptr,
"libsycl-fallback-cstring.spv"}},
832 {
nullptr,
"libsycl-fallback-imf.spv"}},
834 {
nullptr,
"libsycl-fallback-imf-fp64.spv"}},
836 {
nullptr,
"libsycl-fallback-imf-bf16.spv"}},
838 {
"libsycl-native-bfloat16.spv",
"libsycl-fallback-bfloat16.spv"}}};
842 const char *Lib =
nullptr;
844 Lib = Native ? LibPair->second.first : LibPair->second.second;
846 throw compile_program_error(
"Unhandled (new?) device library extension",
847 PI_ERROR_INVALID_OPERATION);
858 "cl_intel_devicelib_math_fp64"},
861 "cl_intel_devicelib_complex_fp64"},
867 "cl_intel_bfloat16_conversions"}};
872 throw compile_program_error(
"Unhandled (new?) device library extension",
873 PI_ERROR_INVALID_OPERATION);
884 auto LockedCache = Context->acquireCachedLibPrograms();
885 auto CachedLibPrograms = LockedCache.get();
886 auto CacheResult = CachedLibPrograms.emplace(
887 std::make_pair(std::make_pair(Extension, Device),
nullptr));
888 bool Cached = !CacheResult.second;
889 auto LibProgIt = CacheResult.first;
896 CachedLibPrograms.erase(LibProgIt);
897 throw compile_program_error(std::string(
"Failed to load ") + LibFileName,
898 PI_ERROR_INVALID_VALUE);
901 const PluginPtr &Plugin = Context->getPlugin();
911 "", 0,
nullptr,
nullptr,
nullptr,
nullptr);
912 if (Error != PI_SUCCESS) {
913 CachedLibPrograms.erase(LibProgIt);
914 throw compile_program_error(
922 const char *SpvFile = std::getenv(
UseSpvEnv);
929 std::ifstream File(SpvFile, std::ios::binary);
932 throw runtime_error(std::string(
"Can't open file specified via ") +
934 PI_ERROR_INVALID_VALUE);
935 File.seekg(0, std::ios::end);
936 size_t Size = File.tellg();
937 std::unique_ptr<char[]> Data(
new char[Size]);
939 File.read(Data.get(), Size);
942 throw runtime_error(std::string(
"read from ") + SpvFile +
943 std::string(
" failed"),
944 PI_ERROR_INVALID_VALUE);
949 std::make_unique<DynRTDeviceBinaryImage>(std::move(Data), Size);
952 std::cerr <<
"loaded device image binary from " << SpvFile <<
"\n";
960 bool JITCompilationIsRequired) {
961 if (!JITCompilationIsRequired)
972 "Recompiling AOT image is not supported");
976 template <
typename StorageKey>
978 const std::unordered_multimap<StorageKey, RTDeviceBinaryImage *> &ImagesSet,
979 const StorageKey &Key,
const context &Context,
const device &Device) {
980 auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key);
981 if (ItBegin == ItEnd)
984 std::vector<pi_device_binary> RawImgs(std::distance(ItBegin, ItEnd));
986 for (
unsigned I = 0; It != ItEnd; ++It, ++I)
997 std::advance(ItBegin, ImgInd);
998 return ItBegin->second;
1001 RTDeviceBinaryImage &
1004 bool JITCompilationIsRequired) {
1006 std::cerr <<
">>> ProgramManager::getDeviceImage(\"" << KernelName <<
"\", "
1008 <<
", " << JITCompilationIsRequired <<
")\n";
1010 std::cerr <<
"available device images:\n";
1015 assert(m_SpvFileImage);
1017 std::unordered_set<RTDeviceBinaryImage *>({m_SpvFileImage.get()}),
1018 Context, Device, JITCompilationIsRequired);
1023 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1024 if (
auto KernelId = m_KernelName2KernelIDs.find(KernelName);
1025 KernelId != m_KernelName2KernelIDs.end()) {
1029 assert(Img &&
"No binary image found for kernel id");
1045 throw runtime_error(
"No kernel named " + KernelName +
" was found",
1046 PI_ERROR_INVALID_KERNEL_NAME);
1050 const std::unordered_set<RTDeviceBinaryImage *> &ImageSet,
1052 bool JITCompilationIsRequired) {
1053 assert(ImageSet.size() > 0);
1056 std::cerr <<
">>> ProgramManager::getDeviceImage(Custom SPV file "
1058 <<
", " << JITCompilationIsRequired <<
")\n";
1060 std::cerr <<
"available device images:\n";
1064 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1065 std::vector<pi_device_binary> RawImgs(ImageSet.size());
1066 auto ImageIterator = ImageSet.begin();
1067 for (
size_t i = 0; i < ImageSet.size(); i++, ImageIterator++)
1068 RawImgs[i] =
const_cast<pi_device_binary>(&(*ImageIterator)->getRawData());
1078 ImageIterator = ImageSet.begin();
1079 std::advance(ImageIterator, ImgInd);
1084 std::cerr <<
"selected device image: " << &(*ImageIterator)->getRawData()
1086 (*ImageIterator)->print();
1088 return **ImageIterator;
1093 0x1 << (
static_cast<uint32_t
>(Ext) -
1095 return ((DeviceLibReqMask & Mask) == Mask);
1098 static std::vector<sycl::detail::pi::PiProgram>
1101 uint32_t DeviceLibReqMask) {
1102 std::vector<sycl::detail::pi::PiProgram> Programs;
1104 std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1119 std::string DevExtList =
1120 Context->getPlatformImpl()->getDeviceImpl(Device)->get_device_info_string(
1122 const bool fp64Support = (DevExtList.npos != DevExtList.find(
"cl_khr_fp64"));
1126 for (
auto &Pair : RequiredDeviceLibExt) {
1128 bool &FallbackIsLoaded = Pair.second;
1130 if (FallbackIsLoaded) {
1147 bool InhibitNativeImpl =
false;
1148 if (
const char *Env = getenv(
"SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1149 InhibitNativeImpl = strstr(Env, ExtName) !=
nullptr;
1152 bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1153 if (!DeviceSupports || InhibitNativeImpl) {
1156 FallbackIsLoaded =
true;
1162 FallbackIsLoaded =
true;
1171 const std::string &CompileOptions,
const std::string &LinkOptions,
1175 std::cerr <<
">>> ProgramManager::build(" << Program.get() <<
", "
1176 << CompileOptions <<
", " << LinkOptions <<
", ... " << Device
1180 bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1185 if (CompileOptions.find(std::string(
"-cmc")) != std::string::npos ||
1186 CompileOptions.find(std::string(
"-vc-codegen")) != std::string::npos)
1187 LinkDeviceLibs =
false;
1189 std::vector<sycl::detail::pi::PiProgram> LinkPrograms;
1190 if (LinkDeviceLibs) {
1194 static const char *ForceLinkEnv = std::getenv(
"SYCL_FORCE_LINK");
1195 static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv ==
'1');
1197 const PluginPtr &Plugin = Context->getPlugin();
1198 if (LinkPrograms.empty() && !ForceLink) {
1199 const std::string &Options = LinkOptions.empty()
1201 : (CompileOptions +
" " + LinkOptions);
1204 Program.get(), 1, &Device, Options.c_str(),
1206 if (Error != PI_SUCCESS)
1214 &Device, CompileOptions.c_str(), 0,
1215 nullptr,
nullptr,
nullptr,
nullptr);
1216 LinkPrograms.push_back(Program.get());
1221 Context->getHandleRef(), 1, &Device,
1222 LinkOptions.c_str(), LinkPrograms.size(), LinkPrograms.data(),
nullptr,
1223 nullptr, &LinkedProg);
1226 if (Error == PI_ERROR_OUT_OF_RESOURCES) {
1227 Context->getKernelProgramCache().reset();
1233 Program.reset(LinkedProg);
1234 if (Error != PI_SUCCESS) {
1241 Plugin->checkPiResult(Error);
1246 void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) {
1247 const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1248 Img.getAssertUsed();
1249 if (AssertUsedRange.isAvailable())
1250 for (
const auto &Prop : AssertUsedRange)
1251 m_KernelUsesAssert.insert(Prop->Name);
1255 return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
1259 const bool DumpImages = std::getenv(
"SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1265 if (EntriesB == EntriesE)
1268 auto Img = std::make_unique<RTDeviceBinaryImage>(RawImg);
1269 static uint32_t SequenceID = 0;
1273 Img->getKernelParamOptInfo();
1275 KernelNameToArgMaskMap &ArgMaskMap =
1276 m_EliminatedKernelArgMasks[Img.get()];
1277 for (
const auto &Info : KPOIRange)
1278 ArgMaskMap[Info->Name] =
1283 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1286 auto ExportedSymbols = Img->getExportedSymbols();
1288 m_ExportedSymbols.insert(ExportedSymbol->Name);
1292 m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(),
1293 [&](
auto &CurrentImg) {
1294 return CurrentImg.first->getFormat() == Img->getFormat();
1296 dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0);
1299 m_BinImg2KernelIDs[Img.get()].reset(
new std::vector<kernel_id>);
1308 if (std::strstr(EntriesIt->name,
"__sycl_service_kernel__")) {
1309 m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
1316 if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1320 auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1321 if (It == m_KernelName2KernelIDs.end()) {
1322 std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1323 std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1325 detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1327 It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
1329 m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1330 m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1333 cacheKernelUsesAssertInfo(*Img);
1338 m_AsanFoundInImage |=
1343 std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1348 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1350 auto DeviceGlobals = Img->getDeviceGlobals();
1361 auto [TypeSize, DeviceImageScopeDecorated] =
1362 DeviceGlobalInfo.
consume<std::uint32_t, std::uint32_t>();
1363 assert(DeviceGlobalInfo.
empty() &&
"Extra data left!");
1368 auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1369 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1371 ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize,
1372 DeviceImageScopeDecorated);
1377 auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1378 DeviceGlobal->Name, Img.get(), TypeSize,
1379 DeviceImageScopeDecorated);
1380 m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1386 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1387 auto HostPipes = Img->getHostPipes();
1397 auto TypeSize = HostPipeInfo.
consume<std::uint32_t>();
1398 assert(HostPipeInfo.
empty() &&
"Extra data left!");
1400 auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name);
1401 if (ExistingHostPipe != m_HostPipes.end()) {
1403 ExistingHostPipe->second->initialize(TypeSize);
1404 ExistingHostPipe->second->initialize(Img.get());
1410 std::make_unique<HostPipeMapEntry>(HostPipe->Name, TypeSize);
1411 EntryUPtr->initialize(Img.get());
1412 m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr));
1416 m_DeviceImages.insert(std::move(Img));
1421 for (
const auto &ImgIt : m_BinImg2KernelIDs) {
1422 ImgIt.first->print();
1427 uint32_t SequenceID)
const {
1428 const char *Prefix = std::getenv(
"SYCL_DUMP_IMAGES_PREFIX");
1429 std::string Fname(Prefix ? Prefix :
"sycl_");
1433 Fname +=
'_' + std::to_string(SequenceID);
1445 std::ofstream F(Fname, std::ios::binary);
1448 throw runtime_error(
"Can not write " + Fname, PI_ERROR_UNKNOWN);
1458 std::cerr <<
">>> ProgramManager::flushSpecConstants(" << Prg.
get()
1465 assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1466 NativePrg = NativePrg ? NativePrg : PrgHandle;
1471 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1472 auto It = NativePrograms.find(NativePrg);
1473 if (It == NativePrograms.end())
1475 sycl::errc::invalid,
1476 "spec constant is set in a program w/o a binary image");
1481 std::cerr <<
">>> ProgramManager::flushSpecConstants: binary image "
1482 << &Img->
getRawData() <<
" doesn't support spec constants\n";
1504 const std::string &KernelName) {
1506 if (m_EliminatedKernelArgMasks.empty())
1510 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1511 auto ImgIt = NativePrograms.find(NativePrg);
1512 if (ImgIt != NativePrograms.end()) {
1513 auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1514 if (MapIt != m_EliminatedKernelArgMasks.end()) {
1515 auto ArgMaskMapIt = MapIt->second.find(KernelName);
1516 if (ArgMaskMapIt != MapIt->second.end())
1517 return &MapIt->second[KernelName];
1525 for (
auto &Elem : m_EliminatedKernelArgMasks) {
1526 auto ArgMask = Elem.second.find(KernelName);
1527 if (ArgMask != Elem.second.end())
1528 return &ArgMask->second;
1536 auto IsAOTBinary = [](
const char *Format) {
1548 return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1553 const std::shared_ptr<detail::device_impl> &DeviceImpl =
1555 auto &Plugin = DeviceImpl->getPlugin();
1567 PIDeviceHandle, &DevBin,
1569 if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1570 throw runtime_error(
"Invalid binary image or device",
1571 PI_ERROR_INVALID_VALUE);
1573 return (0 == SuitableImageID);
1577 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1579 auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1580 if (KernelID == m_KernelName2KernelIDs.end())
1581 throw runtime_error(
"No kernel found with the specified name",
1582 PI_ERROR_INVALID_KERNEL_NAME);
1584 return KernelID->second;
1588 std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1591 m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1593 std::shared_ptr<std::vector<kernel_id>>>
1594 Elem) { return compatibleWithDevice(Elem.first, Dev); });
1598 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1600 std::vector<sycl::kernel_id> AllKernelIDs;
1601 AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1602 for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1603 AllKernelIDs.push_back(KernelID.second);
1605 return AllKernelIDs;
1609 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1611 auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1612 if (KernelID == m_BuiltInKernelIDs.end()) {
1613 auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1614 auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1615 KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1618 return KernelID->second;
1622 const char *UniqueId) {
1623 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1625 auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1626 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1628 ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1629 m_Ptr2DeviceGlobal.insert(
1630 {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1635 std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1636 auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1637 m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1640 std::set<RTDeviceBinaryImage *>
1642 std::set<RTDeviceBinaryImage *> BinImages;
1643 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1644 for (
const kernel_id &KID : KernelIDs) {
1645 auto Range = m_KernelIDs2BinImage.equal_range(KID);
1646 for (
auto It = Range.first, End = Range.second; It != End; ++It)
1647 BinImages.insert(It->second);
1654 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1655 auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1656 assert(Entry != m_Ptr2DeviceGlobal.end() &&
"Device global entry not found");
1657 return Entry->second;
1661 const std::vector<std::string> &UniqueIds,
1662 bool ExcludeDeviceImageScopeDecorated) {
1663 std::vector<DeviceGlobalMapEntry *> FoundEntries;
1664 FoundEntries.reserve(UniqueIds.size());
1666 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1667 for (
const std::string &UniqueId : UniqueIds) {
1668 auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1669 assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1670 "Device global not found in map.");
1671 if (!ExcludeDeviceImageScopeDecorated ||
1672 !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1673 FoundEntries.push_back(DeviceGlobalEntry->second.get());
1675 return FoundEntries;
1679 const char *UniqueId) {
1680 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1682 auto ExistingHostPipe = m_HostPipes.find(UniqueId);
1683 if (ExistingHostPipe != m_HostPipes.end()) {
1684 ExistingHostPipe->second->initialize(HostPipePtr);
1685 m_Ptr2HostPipe.insert({HostPipePtr, ExistingHostPipe->second.get()});
1689 auto EntryUPtr = std::make_unique<HostPipeMapEntry>(UniqueId, HostPipePtr);
1690 auto NewEntry = m_HostPipes.emplace(UniqueId, std::move(EntryUPtr));
1691 m_Ptr2HostPipe.insert({HostPipePtr, NewEntry.first->second.get()});
1696 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1697 auto Entry = m_HostPipes.find(UniqueId);
1698 assert(Entry != m_HostPipes.end() &&
"Host pipe entry not found");
1699 return Entry->second.get();
1703 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1704 auto Entry = m_Ptr2HostPipe.find(HostPipePtr);
1705 assert(Entry != m_Ptr2HostPipe.end() &&
"Host pipe entry not found");
1706 return Entry->second;
1715 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1718 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1719 KernelIDs = m_BinImg2KernelIDs[BinImage];
1723 BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1726 return createSyclObjFromImpl<device_image_plain>(Impl);
1729 std::vector<device_image_plain>
1731 const context &Ctx,
const std::vector<device> &Devs,
1732 bundle_state TargetState,
const std::vector<kernel_id> &KernelIDs) {
1736 std::set<RTDeviceBinaryImage *> BinImages;
1737 if (!KernelIDs.empty()) {
1738 for (
const auto &KID : KernelIDs) {
1739 bool isCompatibleWithAtLeastOneDev =
1740 std::any_of(Devs.begin(), Devs.end(), [&KID](
const auto &Dev) {
1741 return sycl::is_compatible({KID}, Dev);
1743 if (!isCompatibleWithAtLeastOneDev)
1746 "Kernel is incompatible with all devices in devs");
1750 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1751 for (
auto &ImageUPtr : m_BinImg2KernelIDs) {
1752 BinImages.insert(ImageUPtr.first);
1765 for (
auto It = BinImages.begin(); It != BinImages.end();) {
1767 It = BinImages.erase(It);
1772 std::vector<device_image_plain> SYCLDeviceImages;
1780 struct DeviceBinaryImageInfo {
1781 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1783 int RequirementCounter = 0;
1785 std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
1789 using StateImagesPairT =
1790 std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
1791 using KernelImageMapT =
1792 std::map<kernel_id, StateImagesPairT, LessByNameComp>;
1793 KernelImageMapT KernelImageMap;
1794 if (!KernelIDs.empty())
1795 for (
const kernel_id &KernelID : KernelIDs)
1796 KernelImageMap.insert({KernelID, {}});
1798 for (RTDeviceBinaryImage *BinImage : BinImages) {
1803 auto InsertRes = ImageInfoMap.insert({BinImage, {}});
1804 DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
1805 if (InsertRes.second) {
1809 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1810 ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
1814 const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
1816 int &ImgRequirementCounter = ImgInfo.RequirementCounter;
1819 if (!ImageKernelIDs || ImageKernelIDs->empty())
1823 for (kernel_id &KernelID : *ImageKernelIDs) {
1824 StateImagesPairT *StateImagesPair;
1826 if (!KernelIDs.empty()) {
1827 auto It = KernelImageMap.find(KernelID);
1828 if (It == KernelImageMap.end())
1830 StateImagesPair = &It->second;
1832 StateImagesPair = &KernelImageMap[KernelID];
1834 auto &[KernelImagesState, KernelImages] = *StateImagesPair;
1836 if (KernelImages.empty()) {
1837 KernelImagesState = ImgState;
1838 KernelImages.push_back(BinImage);
1839 ++ImgRequirementCounter;
1840 }
else if (KernelImagesState < ImgState) {
1841 for (RTDeviceBinaryImage *Img : KernelImages) {
1842 auto It = ImageInfoMap.find(Img);
1843 assert(It != ImageInfoMap.end());
1844 assert(It->second.RequirementCounter > 0);
1845 --(It->second.RequirementCounter);
1847 KernelImages.clear();
1848 KernelImages.push_back(BinImage);
1849 KernelImagesState = ImgState;
1850 ++ImgRequirementCounter;
1851 }
else if (KernelImagesState == ImgState) {
1852 KernelImages.push_back(BinImage);
1853 ++ImgRequirementCounter;
1859 for (
const auto &ImgInfoPair : ImageInfoMap) {
1860 if (ImgInfoPair.second.RequirementCounter == 0)
1864 ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
1865 ImgInfoPair.second.KernelIDs,
nullptr);
1867 SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
1870 return SYCLDeviceImages;
1873 void ProgramManager::bringSYCLDeviceImagesToState(
1874 std::vector<device_image_plain> &DeviceImages,
bundle_state TargetState) {
1883 switch (TargetState) {
1902 switch (DevImageState) {
1913 std::vector<device_image_plain> LinkedDevImages =
1918 assert(LinkedDevImages.size() == 1 &&
"Expected one linked image here");
1919 DevImage = LinkedDevImages[0];
1933 std::vector<device_image_plain>
1934 ProgramManager::getSYCLDeviceImages(
const context &Ctx,
1935 const std::vector<device> &Devs,
1938 std::vector<device_image_plain> DeviceImages =
1939 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1941 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1942 return DeviceImages;
1945 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1946 const context &Ctx,
const std::vector<device> &Devs,
1949 std::vector<device_image_plain> DeviceImages =
1950 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1953 auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1955 return !Selector(getSyclObjImpl(Image));
1957 DeviceImages.erase(It, DeviceImages.end());
1961 return DeviceImages;
1964 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1965 const context &Ctx,
const std::vector<device> &Devs,
1966 const std::vector<kernel_id> &KernelIDs,
bundle_state TargetState) {
1968 if (KernelIDs.empty())
1972 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1974 for (
auto &It : m_BuiltInKernelIDs) {
1975 if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1978 "Attempting to use a built-in kernel. They are "
1979 "not fully supported");
1984 std::vector<device_image_plain> DeviceImages =
1985 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1988 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1989 return DeviceImages;
1999 std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
2000 const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
2001 &SpecConstData = InputImpl->get_spec_const_data_ref();
2002 const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
2005 for (
const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
2006 std::ignore = SpecConstNames;
2008 if (SpecIDDesc.IsSet) {
2010 Prog, SpecIDDesc.ID, SpecIDDesc.Size,
2011 SpecConsts.data() + SpecIDDesc.BlobOffset);
2019 const std::vector<device> &Devs,
2026 const std::shared_ptr<device_image_impl> &InputImpl =
2033 if (InputImpl->get_bin_image_ref()->getFormat() !=
2036 sycl::runtime_error(
2037 "Creating a program from AOT binary for multiple device is not "
2039 PI_ERROR_INVALID_OPERATION);
2044 *InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs[0]);
2046 if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
2050 InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
2052 InputImpl->get_spec_const_data_ref(),
2053 InputImpl->get_spec_const_blob_ref());
2055 std::vector<pi_device> PIDevices;
2056 PIDevices.reserve(Devs.size());
2057 for (
const device &Dev : Devs)
2061 std::string CompileOptions;
2064 CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
2069 ObjectImpl->get_program_ref(), Devs.size(),
2070 PIDevices.data(), CompileOptions.c_str(),
2074 if (Error != PI_SUCCESS)
2077 getProgramBuildLog(ObjectImpl->get_program_ref(),
2080 return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2083 std::vector<device_image_plain>
2085 const std::vector<device> &Devs,
2089 std::vector<pi_program> PIPrograms;
2090 PIPrograms.push_back(
getSyclObjImpl(DeviceImage)->get_program_ref());
2092 std::vector<pi_device> PIDevices;
2093 PIDevices.reserve(Devs.size());
2094 for (
const device &Dev : Devs)
2097 std::string LinkOptionsStr;
2099 if (LinkOptionsStr.empty()) {
2100 const std::shared_ptr<device_image_impl> &InputImpl =
2103 *(InputImpl->get_bin_image_ref()));
2109 const PluginPtr &Plugin = ContextImpl->getPlugin();
2114 ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
2115 LinkOptionsStr.c_str(), PIPrograms.size(),
2118 nullptr, &LinkedProg);
2121 if (Error == PI_ERROR_OUT_OF_RESOURCES) {
2122 ContextImpl->getKernelProgramCache().reset();
2126 if (Error != PI_SUCCESS) {
2128 const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2131 Plugin->reportPiError(Error,
"link()");
2134 std::shared_ptr<std::vector<kernel_id>> KernelIDs{
new std::vector<kernel_id>};
2135 std::vector<unsigned char> NewSpecConstBlob;
2138 std::shared_ptr<device_image_impl> DeviceImageImpl =
2142 KernelIDs->insert(KernelIDs->end(),
2143 DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2144 DeviceImageImpl->get_kernel_ids_ptr()->end());
2150 const std::lock_guard<std::mutex> SpecConstLock(
2151 DeviceImageImpl->get_spec_const_data_lock());
2155 for (
const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) {
2156 std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2157 NewSpecConstMap[SpecConstIt.first];
2158 assert(NewDescEntries.empty() &&
2159 "Specialization constant already exists in the map.");
2160 NewDescEntries.reserve(SpecConstIt.second.size());
2162 SpecConstIt.second) {
2164 NewSpecConstDesc.
BlobOffset += NewSpecConstBlob.size();
2165 NewDescEntries.push_back(std::move(NewSpecConstDesc));
2171 NewSpecConstBlob.insert(NewSpecConstBlob.end(),
2172 DeviceImageImpl->get_spec_const_blob_ref().begin(),
2173 DeviceImageImpl->get_spec_const_blob_ref().end());
2181 std::make_shared<detail::device_image_impl>(
2183 LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob));
2187 return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2195 const std::vector<device> &Devs,
2199 const std::shared_ptr<device_image_impl> &InputImpl =
2202 const context Context = InputImpl->get_context();
2208 std::string CompileOpts;
2209 std::string LinkOpts;
2215 SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2218 auto BuildF = [
this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2219 &InputImpl, SpecConsts] {
2221 const PluginPtr &Plugin = ContextImpl->getPlugin();
2227 if (InputImpl->get_bin_image_ref()->getFormat() !=
2230 sycl::runtime_error(
2231 "Creating a program from AOT binary for multiple device is not "
2233 PI_ERROR_INVALID_OPERATION);
2237 auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
2238 Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2240 if (!DeviceCodeWasInCache &&
2241 InputImpl->get_bin_image_ref()->supportsSpecConstants())
2244 ProgramPtr ProgramManaged(
2245 NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
2252 uint32_t DeviceLibReqMask = 0;
2255 DeviceLibReqMask = getDeviceLibReqMask(Img);
2257 ProgramPtr BuiltProgram =
2258 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2264 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2265 NativePrograms[BuiltProgram.get()] = &Img;
2268 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2271 if (!DeviceCodeWasInCache)
2272 PersistentDeviceCodeCache::putItemToDisc(
2273 Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.
get());
2275 return BuiltProgram.release();
2279 auto ResProgram = BuildF();
2282 InputImpl->get_kernel_ids_ptr(), ResProgram,
2283 InputImpl->get_spec_const_data_ref(),
2284 InputImpl->get_spec_const_blob_ref());
2286 return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2293 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
PiDevice);
2297 auto GetCachedBuildF = [&Cache, &CacheKey]() {
2303 Cache.
getOrBuild<compile_program_error>(GetCachedBuildF, BuildF);
2305 assert(BuildResult !=
nullptr &&
"Invalid build result");
2312 const PluginPtr &Plugin = ContextImpl->getPlugin();
2313 auto CacheOtherDevices = [ResProgram, &Plugin]() {
2320 for (
size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2325 CacheKey.second = PiDeviceAdd;
2326 Cache.
getOrBuild<compile_program_error>(GetCachedBuildF, CacheOtherDevices);
2328 assert(BuildResult !=
nullptr &&
"Invalid build result");
2338 InputImpl->get_kernel_ids_ptr(), ResProgram,
2339 InputImpl->get_spec_const_data_ref(),
2340 InputImpl->get_spec_const_blob_ref());
2342 return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2347 std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *>
2348 ProgramManager::getOrCreateKernel(
const context &Context,
2349 const std::string &KernelName,
2359 auto BuildF = [
this, &Program, &KernelName, &Ctx] {
2362 const PluginPtr &Plugin = Ctx->getPlugin();
2367 if (Ctx->getPlatformImpl()->supports_usm())
2374 getEliminatedKernelArgMask(Program, KernelName);
2379 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2387 auto [Kernel, ArgMask] = BuildF();
2392 Cache.
getOrBuild<invalid_object_error>(GetCachedBuildF, BuildF);
2394 assert(BuildResult !=
nullptr &&
"Invalid build result");
2401 &(BuildResult->MBuildResultMutex),
2402 BuildResult->Val.second);
2411 #define __SYCL_ASPECT(ASPECT, ID) \
2412 case aspect::ASPECT: \
2414 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
2417 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
2418 switch (AspectNum) {
2419 #include <sycl/info/aspects.def>
2420 #include <sycl/info/aspects_deprecated.def>
2424 std::to_string(
static_cast<unsigned>(AspectNum)));
2425 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2426 #undef __SYCL_ASPECT_DEPRECATED
2427 #undef __SYCL_ASPECT
2431 template <
typename T>
2432 static std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
2442 namespace matrix_ext = ext::oneapi::experimental::matrix;
2447 std::optional<matrix_ext::matrix_type>
2449 const std::string &MatrixTypeString) {
2450 assert(!MatrixTypeString.empty() &&
2451 "MatrixTypeString type string can't be empty. Check if required "
2452 "template specialization for convertTypeToMatrixTypeString exists.");
2453 std::string_view MatrixTypeStringView = MatrixTypeString;
2454 std::string Prefix(
"matrix_type::");
2455 assert((MatrixTypeStringView.substr(0, Prefix.size()) == Prefix) &&
2456 "MatrixTypeString has incorrect prefix, should be \"matrix_type::\".");
2457 MatrixTypeStringView.remove_prefix(Prefix.size());
2458 if (
"bf16" == MatrixTypeStringView)
2459 return matrix_ext::matrix_type::bf16;
2460 else if (
"fp16" == MatrixTypeStringView)
2461 return matrix_ext::matrix_type::fp16;
2462 else if (
"tf32" == MatrixTypeStringView)
2463 return matrix_ext::matrix_type::tf32;
2464 else if (
"fp32" == MatrixTypeStringView)
2465 return matrix_ext::matrix_type::fp32;
2466 else if (
"fp64" == MatrixTypeStringView)
2467 return matrix_ext::matrix_type::fp64;
2468 else if (
"sint8" == MatrixTypeStringView)
2469 return matrix_ext::matrix_type::sint8;
2470 else if (
"sint16" == MatrixTypeStringView)
2471 return matrix_ext::matrix_type::sint16;
2472 else if (
"sint32" == MatrixTypeStringView)
2473 return matrix_ext::matrix_type::sint32;
2474 else if (
"sint64" == MatrixTypeStringView)
2475 return matrix_ext::matrix_type::sint64;
2476 else if (
"uint8" == MatrixTypeStringView)
2477 return matrix_ext::matrix_type::uint8;
2478 else if (
"uint16" == MatrixTypeStringView)
2479 return matrix_ext::matrix_type::uint16;
2480 else if (
"uint32" == MatrixTypeStringView)
2481 return matrix_ext::matrix_type::uint32;
2482 else if (
"uint64" == MatrixTypeStringView)
2483 return matrix_ext::matrix_type::uint64;
2484 return std::nullopt;
2488 size_t RowsUser,
size_t ColsUser,
2490 size_t MaxRowsRuntime,
size_t MaxColsRuntime,
2491 size_t RowsRuntime,
size_t ColsRuntime) {
2492 std::optional<matrix_ext::matrix_type> MatrixTypeUserOpt =
2494 if (!MatrixTypeUserOpt)
2496 bool IsMatrixTypeSupported = (MatrixTypeUserOpt.value() == MatrixTypeRuntime);
2497 bool IsRowsSupported = ((RowsRuntime != 0) ? (RowsUser == RowsRuntime)
2498 : (RowsUser <= MaxRowsRuntime));
2499 bool IsColsSupported = ((ColsRuntime != 0) ? (ColsUser == ColsRuntime)
2500 : (ColsUser <= MaxColsRuntime));
2501 return IsMatrixTypeSupported && IsRowsSupported && IsColsSupported;
2505 const std::string &JointMatrixProStr,
2506 const std::vector<ext::oneapi::experimental::matrix::combination>
2507 &SupportedMatrixCombinations) {
2508 std::istringstream JointMatrixStrStream(JointMatrixProStr);
2509 std::string SingleJointMatrix;
2513 while (std::getline(JointMatrixStrStream, SingleJointMatrix,
';')) {
2514 std::istringstream SingleJointMatrixStrStream(SingleJointMatrix);
2515 std::vector<std::string> JointMatrixVec;
2518 while (std::getline(SingleJointMatrixStrStream, Item,
',')) {
2519 JointMatrixVec.push_back(Item);
2522 assert(JointMatrixVec.size() == 4 &&
2523 "Property set is corrupted, it must have 4 elements.");
2525 const std::string &MatrixTypeUser = JointMatrixVec[0];
2526 const std::string &UseStrUser = JointMatrixVec[1];
2527 size_t RowsUser, ColsUser = 0;
2529 RowsUser = std::stoi(JointMatrixVec[2]);
2530 ColsUser = std::stoi(JointMatrixVec[3]);
2531 }
catch (std::logic_error &) {
2537 bool IsMatrixCompatible =
false;
2539 for (
const auto &Combination : SupportedMatrixCombinations) {
2540 std::optional<ext::oneapi::experimental::matrix::use> Use =
2542 assert(Use &&
"Property set has empty matrix::use value.");
2543 switch (Use.value()) {
2546 MatrixTypeUser, RowsUser, ColsUser, Combination.atype,
2547 Combination.max_msize, Combination.max_ksize, Combination.msize,
2552 MatrixTypeUser, RowsUser, ColsUser, Combination.btype,
2553 Combination.max_ksize, Combination.max_nsize, Combination.ksize,
2556 case matrix_ext::use::accumulator: {
2558 MatrixTypeUser, RowsUser, ColsUser, Combination.ctype,
2559 Combination.max_msize, Combination.max_nsize, Combination.msize,
2562 MatrixTypeUser, RowsUser, ColsUser, Combination.dtype,
2563 Combination.max_msize, Combination.max_nsize, Combination.msize,
2570 if (IsMatrixCompatible)
2574 if (!IsMatrixCompatible)
2576 "joint_matrix with parameters " + MatrixTypeUser +
2578 ", Rows=" + std::to_string(RowsUser) +
2579 ", Cols=" + std::to_string(ColsUser) +
2580 " is not supported on this device");
2582 return std::nullopt;
2586 const std::string &JointMatrixProStr,
2587 const std::vector<ext::oneapi::experimental::matrix::combination>
2588 &SupportedMatrixCombinations) {
2589 std::istringstream JointMatrixMadStrStream(JointMatrixProStr);
2590 std::string SingleJointMatrixMad;
2594 while (std::getline(JointMatrixMadStrStream, SingleJointMatrixMad,
';')) {
2595 std::istringstream SingleJointMatrixMadStrStream(SingleJointMatrixMad);
2596 std::vector<std::string> JointMatrixMadVec;
2599 while (std::getline(SingleJointMatrixMadStrStream, Item,
',')) {
2600 JointMatrixMadVec.push_back(Item);
2603 assert(JointMatrixMadVec.size() == 7 &&
2604 "Property set is corrupted, it must have 7 elements.");
2606 const std::string &MatrixTypeAStrUser = JointMatrixMadVec[0];
2607 const std::string &MatrixTypeBStrUser = JointMatrixMadVec[1];
2608 const std::string &MatrixTypeCStrUser = JointMatrixMadVec[2];
2609 const std::string &MatrixTypeDStrUser = JointMatrixMadVec[3];
2610 size_t MSizeUser, KSizeUser, NSizeUser = 0;
2612 MSizeUser = std::stoi(JointMatrixMadVec[4]);
2613 KSizeUser = std::stoi(JointMatrixMadVec[5]);
2614 NSizeUser = std::stoi(JointMatrixMadVec[6]);
2615 }
catch (std::logic_error &) {
2621 std::optional<matrix_ext::matrix_type> MatrixTypeAUserOpt =
2623 std::optional<matrix_ext::matrix_type> MatrixTypeBUserOpt =
2625 std::optional<matrix_ext::matrix_type> MatrixTypeCUserOpt =
2627 std::optional<matrix_ext::matrix_type> MatrixTypeDUserOpt =
2630 bool IsMatrixMadCompatible =
false;
2632 for (
const auto &Combination : SupportedMatrixCombinations) {
2633 if (!MatrixTypeAUserOpt || !MatrixTypeBUserOpt || !MatrixTypeCUserOpt ||
2634 !MatrixTypeDUserOpt)
2637 bool IsMatrixTypeACompatible =
2638 (MatrixTypeAUserOpt.value() == Combination.atype);
2639 bool IsMatrixTypeBCompatible =
2640 (MatrixTypeBUserOpt.value() == Combination.btype);
2641 bool IsMatrixTypeCCompatible =
2642 (MatrixTypeCUserOpt.value() == Combination.ctype);
2643 bool IsMatrixTypeDCompatible =
2644 (MatrixTypeDUserOpt.value() == Combination.dtype);
2645 bool IsMSizeCompatible =
2646 ((Combination.msize != 0) ? (MSizeUser == Combination.msize)
2647 : (MSizeUser <= Combination.max_msize));
2648 bool IsKSizeCompatible =
2649 ((Combination.ksize != 0) ? (KSizeUser == Combination.ksize)
2650 : (KSizeUser <= Combination.max_ksize));
2651 bool IsNSizeCompatible =
2652 ((Combination.nsize != 0) ? (NSizeUser == Combination.nsize)
2653 : (NSizeUser <= Combination.max_nsize));
2655 IsMatrixMadCompatible =
2656 IsMatrixTypeACompatible && IsMatrixTypeBCompatible &&
2657 IsMatrixTypeCCompatible && IsMatrixTypeDCompatible &&
2658 IsMSizeCompatible && IsKSizeCompatible && IsNSizeCompatible;
2661 if (IsMatrixMadCompatible)
2665 if (!IsMatrixMadCompatible)
2668 "joint_matrix_mad function with parameters atype=" +
2669 MatrixTypeAStrUser +
", btype=" + MatrixTypeBStrUser +
2670 ", ctype=" + MatrixTypeCStrUser +
", dtype=" +
2671 MatrixTypeDStrUser +
", M=" + std::to_string(MSizeUser) +
", K=" +
2672 std::to_string(KSizeUser) +
", N=" + std::to_string(NSizeUser) +
2673 " is not supported on this "
2676 return std::nullopt;
2679 std::optional<sycl::exception>
2683 auto getPropIt = [&Img](
const std::string &PropName) {
2687 PropRange.
begin(), PropRange.
end(),
2689 return (*Prop)->Name == PropName;
2691 return (PropIt == PropRange.
end())
2697 auto AspectsPropIt = getPropIt(
"aspects");
2698 auto JointMatrixPropIt = getPropIt(
"joint_matrix");
2699 auto JointMatrixMadPropIt = getPropIt(
"joint_matrix_mad");
2700 auto ReqdWGSizeUint32TPropIt = getPropIt(
"reqd_work_group_size");
2701 auto ReqdWGSizeUint64TPropIt = getPropIt(
"reqd_work_group_size_uint64_t");
2702 auto ReqdSubGroupSizePropIt = getPropIt(
"reqd_sub_group_size");
2705 if (AspectsPropIt) {
2710 while (!Aspects.
empty()) {
2711 aspect Aspect = Aspects.
consume<aspect>();
2712 if (!Dev.
has(Aspect))
2715 " is not supported on the device");
2719 if (JointMatrixPropIt) {
2720 std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
2722 ext::oneapi::experimental::info::device::matrix_combinations>();
2724 if (Combinations.empty())
2726 "no matrix hardware on the target device, "
2727 "joint_matrix is not supported");
2733 std::string JointMatrixByteArrayToStr;
2734 while (!JointMatrixByteArray.
empty()) {
2735 JointMatrixByteArrayToStr += JointMatrixByteArray.
consume<
char>();
2737 std::optional<sycl::exception> Result =
2740 return Result.value();
2743 if (JointMatrixMadPropIt) {
2744 std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
2746 ext::oneapi::experimental::info::device::matrix_combinations>();
2748 if (Combinations.empty())
2750 "no matrix hardware on the target device, "
2751 "joint_matrix_mad is not supported");
2757 std::string JointMatrixMadByteArrayToStr;
2758 while (!JointMatrixMadByteArray.
empty()) {
2759 JointMatrixMadByteArrayToStr += JointMatrixMadByteArray.
consume<
char>();
2762 JointMatrixMadByteArrayToStr, Combinations);
2764 return Result.value();
2768 if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
2775 bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value();
2776 auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
2781 uint64_t ReqdWGSizeAllDimsTotal = 1;
2782 std::vector<uint64_t> ReqdWGSizeVec;
2784 while (!ReqdWGSize.
empty()) {
2785 uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.
consume<uint64_t>()
2786 : ReqdWGSize.
consume<uint32_t>();
2789 ReqdWGSizeAllDimsTotal = *res;
2792 sycl::errc::kernel_not_supported,
2793 "Required work-group size is not supported"
2794 " (total number of work-items requested can't fit into size_t)");
2795 ReqdWGSizeVec.push_back(SingleDimSize);
2799 if (NDRDesc.
Dims != 0 && NDRDesc.
Dims !=
static_cast<size_t>(Dims))
2802 "The local size dimension of submitted nd_range doesn't match the "
2803 "required work-group size dimension");
2808 if (ReqdWGSizeAllDimsTotal >
2809 Dev.
get_info<info::device::max_work_group_size>())
2811 "Required work-group size " +
2812 std::to_string(ReqdWGSizeAllDimsTotal) +
2813 " is not supported on the device");
2816 std::variant<id<1>,
id<2>,
id<3>> MaxWorkItemSizesVariant;
2818 MaxWorkItemSizesVariant =
2821 MaxWorkItemSizesVariant =
2824 MaxWorkItemSizesVariant =
2826 for (
int i = 0; i < Dims; i++) {
2831 if (ReqdWGSizeVec[i] >
2834 "Required work-group size " +
2835 std::to_string(ReqdWGSizeVec[i]) +
2836 " is not supported");
2837 }
else if (Dims == 2) {
2838 if (ReqdWGSizeVec[i] >
2841 "Required work-group size " +
2842 std::to_string(ReqdWGSizeVec[i]) +
2843 " is not supported");
2845 if (ReqdWGSizeVec[i] >
2848 "Required work-group size " +
2849 std::to_string(ReqdWGSizeVec[i]) +
2850 " is not supported");
2855 if (ReqdSubGroupSizePropIt) {
2856 auto ReqdSubGroupSize =
2858 auto SupportedSubGroupSizes = Dev.
get_info<info::device::sub_group_sizes>();
2865 SupportedSubGroupSizes.cend(),
2866 [=](
auto s) { return s == ReqdSubGroupSize; }))
2869 std::to_string(ReqdSubGroupSize) +
2870 " is not supported on the device");
2881 sycl::detail::ProgramManager::getInstance().addImages(desc);
The context class represents a SYCL context on which kernel functions may be executed.
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
platform get_platform() const
Gets platform associated with this SYCL context.
void dropBytes(std::size_t Bytes)
ByteArray asByteArray() const
pi_uint32 asUint32() const
ProgramManager & getProgramManager()
static GlobalHandler & instance()
auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
std::pair< KernelBuildResultPtr, bool > getOrInsertKernel(sycl::detail::pi::PiProgram Program, const std::string &KernelName)
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
std::pair< ProgramBuildResultPtr, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
std::pair< sycl::detail::pi::PiKernel, const KernelArgMask * > KernelArgMaskPairT
static constexpr const char * DirSep
static std::string getCurrentDSODir()
Returns an absolute path to a directory where the object was found.
static std::vector< std::vector< char > > getItemFromDisc(const device &Device, const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, const std::string &BuildOptionsString)
static void putItemToDisc(const device &Device, const RTDeviceBinaryImage &Img, const SerializedObj &SpecConsts, const std::string &BuildOptionsString, const sycl::detail::pi::PiProgram &NativePrg)
void debugPrintBinaryImages() const
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
kernel_id getBuiltInKernelID(const std::string &KernelName)
sycl::detail::pi::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
void addImages(pi_device_binaries DeviceImages)
static ProgramManager & getInstance()
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
const KernelArgMask * getEliminatedKernelArgMask(pi::PiProgram NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
sycl::detail::pi::PiProgram getBuiltPIProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={}, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
sycl::detail::pi::PiProgram getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, const ContextImplPtr Context)
bool kernelUsesAssert(const std::string &KernelName) const
std::pair< sycl::detail::pi::PiProgram, bool > getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a PI program using either a cached device code binary if present in the persistent cache or f...
static std::string getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, const ContextImplPtr Context)
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
void flushSpecConstants(const program_impl &Prg, pi::PiProgram NativePrg=nullptr, const RTDeviceBinaryImage *Img=nullptr)
Resolves given program to a device binary image and requests the program to flush constants the image...
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={})
bool hasCompatibleImage(const device &Dev)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::vector< kernel_id > getAllSYCLKernelIDs()
ConstIterator end() const
ConstIterator begin() const
virtual void dump(std::ostream &Out) const
const pi_device_binary_struct & getRawData() const
pi_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
const char * getLinkOptions() const
const PropertyRange & getProgramMetadata() const
std::uintptr_t getImageID() const
bool supportsSpecConstants() const
const PropertyRange & getDeviceRequirements() const
const char * getCompileOptions() const
virtual void print() const
pi_device_binary get() const
const PropertyRange & getDeviceLibReqMask() const
pi::PiDeviceBinaryType getFormat() const
Returns the format of the binary image.
static const char * get()
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
bool hasSetSpecConstants() const
Tells whether a specialization constant has been set for this program.
void flush_spec_constants(const RTDeviceBinaryImage &Img, sycl::detail::pi::PiProgram NativePrg=nullptr) const
Takes current values of specialization constants and "injects" them into the underlying native progra...
cl_program get() const
Returns a valid cl_program instance.
sycl::detail::pi::PiProgram & getHandleRef()
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
bool has(aspect Aspect) const __SYCL_WARN_IMAGE_ASPECT(Aspect)
Indicates if the SYCL device has the given feature.
A unique identifier of an item in an index space.
Objects of the class identify kernel is some kernel_bundle related APIs.
Objects of the property_list class are containers for the SYCL properties.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
__SYCL_EXTERN_STREAM_ATTRS ostream clog
Linked to standard error (buffered)
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
::pi_device_binary_type PiDeviceBinaryType
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts)
std::add_pointer_t< typename decltype(T::impl)::element_type > getRawSyclObjImpl(const T &SyclObject)
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc)
void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image, bool JITCompilationIsRequired)
static bool isDeviceBinaryTypeSupported(const context &C, sycl::detail::pi::PiDeviceBinaryType Format)
static constexpr int DbgProgMgr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
std::optional< matrix_ext::matrix_type > convertMatrixTypeStringMatrixTypeEnumValue(const std::string &MatrixTypeString)
static void enableITTAnnotationsIfNeeded(const sycl::detail::pi::PiProgram &Prog, const PluginPtr &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
static const char * getDeviceLibFilename(DeviceLibExt Extension, bool Native)
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, const char *PropName)
std::vector< bool > KernelArgMask
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
static sycl::detail::pi::PiProgram loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, const sycl::detail::pi::PiDevice &Device, bool UseNativeLib)
@ cl_intel_devicelib_math_fp64
@ cl_intel_devicelib_cstring
@ cl_intel_devicelib_imf_bf16
@ cl_intel_devicelib_math
@ cl_intel_devicelib_complex_fp64
@ cl_intel_devicelib_assert
@ cl_intel_devicelib_imf_fp64
@ cl_intel_devicelib_bfloat16
@ cl_intel_devicelib_complex
static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, const char *PropName)
static void appendLinkOptionsFromImage(std::string &LinkOpts, const RTDeviceBinaryImage &Img)
static constexpr uint32_t ITTSpecConstId
KernelArgMask createKernelArgMask(const ByteArray &Bytes)
static std::string getAspectNameStr(sycl::aspect AspectNum)
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
static void appendCompileOptionsForGRFSizeProperties(std::string &CompileOpts, const RTDeviceBinaryImage &Img, bool IsEsimdImage)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts)
static const std::map< DeviceLibExt, const char * > DeviceLibExtensionStrs
std::optional< sycl::exception > checkDevSupportJointMatrix(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
constexpr std::optional< ext::oneapi::experimental::matrix::use > convertMatrixUseStringToEnum(const char *UseString)
static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devs, const PluginPtr &)
RTDeviceBinaryImage * getBinImageFromMultiMap(const std::unordered_multimap< StorageKey, RTDeviceBinaryImage * > &ImagesSet, const StorageKey &Key, const context &Context, const device &Device)
std::shared_ptr< plugin > PluginPtr
static void setSpecializationConstants(const std::shared_ptr< device_image_impl > &InputImpl, sycl::detail::pi::PiProgram Prog, const PluginPtr &Plugin)
std::shared_ptr< device_impl > DeviceImplPtr
std::optional< sycl::exception > checkDevSupportJointMatrixMad(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devices, const PluginPtr &Plugin)
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
bool isMatrixSupportedByHW(const std::string &MatrixTypeStrUser, size_t RowsUser, size_t ColsUser, matrix_ext::matrix_type MatrixTypeRuntime, size_t MaxRowsRuntime, size_t MaxColsRuntime, size_t RowsRuntime, size_t ColsRuntime)
constexpr tuple< Ts... > make_tuple(Ts... Args)
static sycl::detail::pi::PiProgram createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
static std::enable_if_t< std::is_unsigned_v< T >, std::optional< T > > multiply_with_overflow_check(T x, T y)
static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts)
static void emitBuiltProgramInfo(const pi_program &Prog, const ContextImplPtr &Context)
Emits information about built programs if the appropriate contitions are met, namely when SYCL_RT_WAR...
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, sycl::detail::pi::PiProgram &Prog)
static const char * getFormatStr(sycl::detail::pi::PiDeviceBinaryType Format)
static void appendCompileEnvironmentVariablesThatAppend(std::string &CompileOpts)
std::vector< unsigned char > SerializedObj
static const std::map< DeviceLibExt, std::pair< const char *, const char * > > DeviceLibNames
static std::vector< sycl::detail::pi::PiProgram > getDeviceLibPrograms(const ContextImplPtr Context, const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask)
static sycl::detail::pi::PiProgram createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< pi_device_binary_property > Metadata)
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel)
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
@ PI_DEVICE_INFO_BUILD_ON_SUBDEVICE
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE
pi_result piProgramRetain(pi_program program)
@ PI_USM_INDIRECT_ACCESS
indicates that the kernel might access data through USM ptrs
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
pi_result piKernelRetain(pi_kernel kernel)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
pi_result piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, size_t spec_size, const void *spec_value)
Sets a specialization constant to a specific value.
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, size_t param_value_size, const void *param_value)
API to set attributes controlling kernel execution.
pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV
pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, const pi_program *input_programs, void(*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program)
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN
pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program)
pi_result piProgramCompile(pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, void(*pfn_notify)(pi_program program, void *user_data), void *user_data)
@ PI_CONTEXT_INFO_NUM_DEVICES
pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, pi_int32 *binary_status, pi_program *ret_program)
Creates a PI program for a context and loads the given binary into it.
@ PI_PROGRAM_INFO_DEVICES
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, pi_uint32 num_binaries, pi_uint32 *selected_binary_ind)
Selects the most appropriate device binary based on runtime information and the IR characteristics.
static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA
@ PI_PROGRAM_BUILD_INFO_LOG
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
bool none_of(const simd_mask< _Tp, _Abi > &) noexcept
This struct is a record of all the device code that may be offloaded.
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
pi_device_binary DeviceBinaries
Device binaries data.
This struct is a record of the device binary information.
_pi_offload_entry EntriesEnd
_pi_offload_entry EntriesBegin
the offload entry table
const unsigned char * BinaryEnd
Pointer to the target code end.
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
const unsigned char * BinaryStart
Pointer to the target code start.