46 inline namespace _V1 {
49 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
53 static constexpr
char UseSpvEnv[](
"SYCL_USE_KERNEL_SPV");
61 constexpr
char SpecValue = 1;
73 const unsigned char *Data,
size_t DataLen,
74 const std::vector<pi_device_binary_property> Metadata) {
75 const PluginPtr &Plugin = Context->getPlugin();
80 sizeof(NumDevices), &NumDevices,
82 assert(NumDevices > 0 &&
83 "Only a single device is supported for AOT compilation");
91 Context->getHandleRef(), 1 , &
PiDevice, &DataLen, &Data,
92 Metadata.size(), Metadata.data(), &BinaryStatus, &Program);
94 if (BinaryStatus != CL_SUCCESS) {
95 throw runtime_error(
"Creating program with binary failed.", BinaryStatus);
105 const PluginPtr &Plugin = Context->getPlugin();
130 template <
typename RetT,
typename ExceptionT,
typename GetCachedBuildFT,
132 KernelProgramCache::BuildResult<RetT> *
137 auto [BuildResult, InsertionTookPlace] = GetCachedBuild();
141 if (!InsertionTookPlace) {
150 BuildState Expected = BuildState::BS_Failed;
151 BuildState Desired = BuildState::BS_InProgress;
153 if (BuildResult->State.compare_exchange_strong(Expected, Desired))
160 BuildResult->Val = Build();
161 RetT *Desired = &BuildResult->Val;
164 RetT *Expected =
nullptr;
166 if (!BuildResult->Ptr.compare_exchange_strong(Expected, Desired))
168 assert(
false &&
"We've build an entity that is already have been built.");
170 BuildResult->Ptr.store(Desired);
176 std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
177 BuildResult->State.store(BuildState::BS_Done);
184 BuildResult->Error.Msg = Ex.
what();
188 std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
189 BuildResult->State.store(BuildState::BS_Failed);
194 std::rethrow_exception(std::current_exception());
197 std::lock_guard<std::mutex> Lock(BuildResult->MBuildResultMutex);
198 BuildResult->State.store(BuildState::BS_Failed);
203 std::rethrow_exception(std::current_exception());
224 for (
const device &D : Devices) {
225 if (!D.get_info<info::device::is_compiler_available>())
232 if (ver.find(
"OpenCL 1.0") == std::string::npos &&
233 ver.find(
"OpenCL 1.1") == std::string::npos &&
234 ver.find(
"OpenCL 1.2") == std::string::npos &&
235 ver.find(
"OpenCL 2.0") == std::string::npos)
239 for (
const device &D : Devices) {
242 std::vector<std::string> Extensions =
243 D.get_info<info::device::extensions>();
244 if (Extensions.end() ==
245 std::find(Extensions.begin(), Extensions.end(),
"cl_khr_il_program"))
263 assert(
false &&
"Unknown device image format");
271 std::cerr <<
">>> ProgramManager::createPIProgram(" << &Img <<
", "
278 throw runtime_error(
"Malformed device program image descriptor",
279 PI_ERROR_INVALID_VALUE);
282 throw runtime_error(
"Invalid device program image: size is zero",
283 PI_ERROR_INVALID_VALUE);
285 size_t ImgSize = Img.
getSize();
302 "SPIR-V online compilation is not supported in this context",
303 PI_ERROR_INVALID_OPERATION);
307 std::vector<pi_device_binary_property> ProgMetadataVector{
308 ProgMetadata.
begin(), ProgMetadata.end()};
319 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
321 NativePrograms[Res] = &Img;
324 Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
339 if (TemporaryStr !=
nullptr) {
340 if (!LinkOpts.empty())
342 LinkOpts += std::string(TemporaryStr);
348 const char *PropName) {
354 const char *PropName) {
356 std::stringstream ss;
360 if (optLevel < 0 || optLevel > 3)
362 ss <<
"-O" << optLevel;
363 std::string temp = ss.str();
377 if (!RegAllocModeProp && !GRFSizeProp)
381 assert(!RegAllocModeProp || !GRFSizeProp);
382 bool IsLargeGRF =
false;
383 bool IsAutoGRF =
false;
384 if (RegAllocModeProp) {
385 uint32_t RegAllocModePropVal =
387 IsLargeGRF = RegAllocModePropVal ==
389 IsAutoGRF = RegAllocModePropVal ==
394 IsLargeGRF = GRFSizePropVal == 256;
395 IsAutoGRF = GRFSizePropVal == 0;
398 if (!CompileOpts.empty())
401 CompileOpts += IsEsimdImage ?
"-doubleGRF" :
"-ze-opt-large-register-file";
404 if (IsAutoGRF && !IsEsimdImage) {
405 if (!CompileOpts.empty())
408 CompileOpts +=
"-ze-intel-enable-auto-large-GRF-mode";
414 const std::vector<device> &Devs,
419 static const char *CompileOptsEnv =
423 if (!CompileOptsEnv) {
424 if (!CompileOpts.empty())
427 if (TemporaryStr !=
nullptr)
428 CompileOpts += std::string(TemporaryStr);
434 if (!CompileOpts.empty())
436 CompileOpts +=
"-vc-codegen";
440 CompileOpts +=
" -disable-finalizer-msg";
449 const char *optLevelStr = str.c_str();
454 if (!isEsimdImage && !CompileOptsEnv && optLevelStr !=
nullptr &&
455 optLevelStr[0] !=
'\0') {
457 assert(!Devs.empty() &&
459 return Dev.get_platform() == Devs[0].get_platform();
461 const char *backend_option =
nullptr;
464 PlatformImpl->getBackendOption(optLevelStr, &backend_option);
465 if (backend_option && backend_option[0] !=
'\0') {
466 if (!CompileOpts.empty())
468 CompileOpts += std::string(backend_option);
475 return Dev.is_gpu() &&
476 Dev.get_info<info::device::vendor_id>() == 0x8086;
481 if (!CompileOpts.empty())
483 CompileOpts +=
"-ze-take-global-address";
485 if (!CompileOptsEnv) {
486 static const char *TargetCompileFast =
"-ftarget-compile-fast";
487 if (
auto Pos = CompileOpts.find(TargetCompileFast);
488 Pos != std::string::npos) {
489 const char *BackendOption =
nullptr;
491 PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
492 auto OptLen = strlen(TargetCompileFast);
493 if (IsIntelGPU && BackendOption && BackendOption[0] !=
'\0')
494 CompileOpts.replace(Pos, OptLen, BackendOption);
496 CompileOpts.erase(Pos, OptLen);
502 std::string &LinkOpts,
504 const std::vector<device> &Devices,
513 static const char *CompileOptsEnv =
515 if (CompileOptsEnv) {
516 CompileOpts = CompileOptsEnv;
525 LinkOpts = LinkOptsEnv;
530 std::string &LinkOpts) {
536 std::pair<sycl::detail::pi::PiProgram, bool>
540 const std::string &CompileAndLinkOptions,
545 Device, Img, SpecConsts, CompileAndLinkOptions);
546 if (BinProg.size()) {
549 std::vector<pi_device_binary_property> ProgMetadataVector{
550 ProgMetadata.
begin(), ProgMetadata.end()};
554 (
const unsigned char *)BinProg[0].data(),
555 BinProg[0].size(), ProgMetadataVector);
559 return {NativePrg, BinProg.size()};
567 std::string ProgramBuildLog =
569 std::clog << ProgramBuildLog << std::endl;
576 bool JITCompilationIsRequired) {
579 std::string CompileOpts;
580 std::string LinkOpts;
594 while (!RootDevImpl->isRootDevice()) {
596 RootDevImpl->get_info<info::device::parent_device>());
598 if (!ContextImpl->hasDevice(ParentDev))
600 RootDevImpl = ParentDev;
606 sizeof(
pi_bool), &MustBuildOnSubdevice,
nullptr);
609 (MustBuildOnSubdevice ==
PI_TRUE) ? DeviceImpl : RootDevImpl;
610 auto Context = createSyclObjFromImpl<context>(ContextImpl);
611 auto Device = createSyclObjFromImpl<device>(Dev);
613 getDeviceImage(KernelName, Context, Device, JITCompilationIsRequired);
619 auto BuildF = [
this, &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts,
620 &LinkOpts, SpecConsts] {
621 const PluginPtr &Plugin = ContextImpl->getPlugin();
625 Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);
627 if (!DeviceCodeWasInCache) {
634 ProgramPtr ProgramManaged(
635 NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
643 uint32_t DeviceLibReqMask = 0;
644 if (!DeviceCodeWasInCache &&
649 ProgramPtr BuiltProgram =
650 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
656 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
657 NativePrograms[BuiltProgram.get()] = &Img;
660 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
663 if (!DeviceCodeWasInCache)
665 Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.
get());
666 return BuiltProgram.release();
672 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
673 std::make_pair(
PiDevice, CompileOpts + LinkOpts));
675 auto GetCachedBuildF = [&Cache, &CacheKey]() {
680 getOrBuild<sycl::detail::pi::PiProgram, compile_program_error>(
681 Cache, GetCachedBuildF, BuildF);
683 assert(BuildResult !=
nullptr &&
"Invalid build result");
684 return *BuildResult->Ptr.load();
691 const std::string &KernelName,
694 std::cerr <<
">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get()
695 <<
", " << DeviceImpl.get() <<
", " << KernelName <<
")\n";
702 std::string CompileOpts, LinkOpts;
712 CompileOpts + LinkOpts, KernelName);
714 if (std::get<0>(ret_tuple))
720 auto BuildF = [
this, &Program, &KernelName, &ContextImpl] {
723 const PluginPtr &Plugin = ContextImpl->getPlugin();
725 Program, KernelName.c_str(), &Kernel);
735 return std::make_pair(Kernel, ArgMask);
738 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
742 auto BuildResult = getOrBuild<KernelArgMaskPairT, invalid_object_error>(
743 Cache, GetCachedBuildF, BuildF);
745 assert(BuildResult !=
nullptr &&
"Invalid build result");
746 const KernelArgMaskPairT &KernelArgMaskPair = *BuildResult->Ptr.load();
748 &(BuildResult->MBuildResultMutex),
749 KernelArgMaskPair.second, Program);
758 const PluginPtr &Plugin = Context->getPlugin();
768 size_t PIDevicesSize = 0;
769 const PluginPtr &Plugin = Context->getPlugin();
771 nullptr, &PIDevicesSize);
772 std::vector<sycl::detail::pi::PiDevice> PIDevices(
775 PIDevicesSize, PIDevices.data(),
777 std::string Log =
"The program was built for " +
778 std::to_string(PIDevices.size()) +
" devices";
780 std::string DeviceBuildInfoString;
781 size_t DeviceBuildInfoStrSize = 0;
784 &DeviceBuildInfoStrSize);
785 if (DeviceBuildInfoStrSize > 0) {
786 std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
789 DeviceBuildInfo.data(),
nullptr);
790 DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
793 std::string DeviceNameString;
794 size_t DeviceNameStrSize = 0;
796 nullptr, &DeviceNameStrSize);
797 if (DeviceNameStrSize > 0) {
798 std::vector<char> DeviceName(DeviceNameStrSize);
801 DeviceName.data(),
nullptr);
802 DeviceNameString = std::string(DeviceName.data());
804 Log +=
"\nBuild program log for '" + DeviceNameString +
"':\n" +
805 DeviceBuildInfoString;
817 std::ifstream::in | std::ifstream::binary);
822 File.seekg(0, std::ios::end);
823 size_t FileSize = File.tellg();
824 File.seekg(0, std::ios::beg);
825 std::vector<char> FileContent(FileSize);
826 File.read(&FileContent[0], FileSize);
831 return Prog !=
nullptr;
836 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
839 {
nullptr,
"libsycl-fallback-cassert.spv"}},
841 {
nullptr,
"libsycl-fallback-cmath.spv"}},
843 {
nullptr,
"libsycl-fallback-cmath-fp64.spv"}},
845 {
nullptr,
"libsycl-fallback-complex.spv"}},
847 {
nullptr,
"libsycl-fallback-complex-fp64.spv"}},
849 {
nullptr,
"libsycl-fallback-cstring.spv"}},
851 {
nullptr,
"libsycl-fallback-imf.spv"}},
853 {
nullptr,
"libsycl-fallback-imf-fp64.spv"}},
855 {
nullptr,
"libsycl-fallback-imf-bf16.spv"}},
857 {
"libsycl-native-bfloat16.spv",
"libsycl-fallback-bfloat16.spv"}}};
861 const char *Lib =
nullptr;
863 Lib = Native ? LibPair->second.first : LibPair->second.second;
865 throw compile_program_error(
"Unhandled (new?) device library extension",
866 PI_ERROR_INVALID_OPERATION);
877 "cl_intel_devicelib_math_fp64"},
880 "cl_intel_devicelib_complex_fp64"},
886 "cl_intel_bfloat16_conversions"}};
891 throw compile_program_error(
"Unhandled (new?) device library extension",
892 PI_ERROR_INVALID_OPERATION);
903 auto LockedCache = Context->acquireCachedLibPrograms();
904 auto CachedLibPrograms = LockedCache.get();
905 auto CacheResult = CachedLibPrograms.emplace(
906 std::make_pair(std::make_pair(Extension, Device),
nullptr));
907 bool Cached = !CacheResult.second;
908 auto LibProgIt = CacheResult.first;
915 CachedLibPrograms.erase(LibProgIt);
916 throw compile_program_error(std::string(
"Failed to load ") + LibFileName,
917 PI_ERROR_INVALID_VALUE);
920 const PluginPtr &Plugin = Context->getPlugin();
930 "", 0,
nullptr,
nullptr,
nullptr,
nullptr);
931 if (Error != PI_SUCCESS) {
932 CachedLibPrograms.erase(LibProgIt);
933 throw compile_program_error(
941 const char *SpvFile = std::getenv(
UseSpvEnv);
948 std::ifstream File(SpvFile, std::ios::binary);
951 throw runtime_error(std::string(
"Can't open file specified via ") +
953 PI_ERROR_INVALID_VALUE);
954 File.seekg(0, std::ios::end);
955 size_t Size = File.tellg();
956 std::unique_ptr<char[]> Data(
new char[Size]);
958 File.read(Data.get(), Size);
961 throw runtime_error(std::string(
"read from ") + SpvFile +
962 std::string(
" failed"),
963 PI_ERROR_INVALID_VALUE);
968 make_unique_ptr<DynRTDeviceBinaryImage>(std::move(Data), Size);
971 std::cerr <<
"loaded device image binary from " << SpvFile <<
"\n";
979 bool JITCompilationIsRequired) {
980 if (!JITCompilationIsRequired)
991 PI_ERROR_INVALID_OPERATION);
995 template <
typename StorageKey>
997 const std::unordered_multimap<StorageKey, RTDeviceBinaryImage *> &ImagesSet,
998 const StorageKey &Key,
const context &Context,
const device &Device) {
999 auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key);
1000 if (ItBegin == ItEnd)
1003 std::vector<pi_device_binary> RawImgs(std::distance(ItBegin, ItEnd));
1005 for (
unsigned I = 0; It != ItEnd; ++It, ++I)
1016 std::advance(ItBegin, ImgInd);
1017 return ItBegin->second;
1020 RTDeviceBinaryImage &
1023 bool JITCompilationIsRequired) {
1025 std::cerr <<
">>> ProgramManager::getDeviceImage(\"" << KernelName <<
"\", "
1027 <<
", " << JITCompilationIsRequired <<
")\n";
1029 std::cerr <<
"available device images:\n";
1034 assert(m_SpvFileImage);
1036 std::unordered_set<RTDeviceBinaryImage *>({m_SpvFileImage.get()}),
1037 Context, Device, JITCompilationIsRequired);
1042 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1043 if (
auto KernelId = m_KernelName2KernelIDs.find(KernelName);
1044 KernelId != m_KernelName2KernelIDs.end()) {
1048 assert(Img &&
"No binary image found for kernel id");
1064 throw runtime_error(
"No kernel named " + KernelName +
" was found",
1065 PI_ERROR_INVALID_KERNEL_NAME);
1069 const std::unordered_set<RTDeviceBinaryImage *> &ImageSet,
1071 bool JITCompilationIsRequired) {
1072 assert(ImageSet.size() > 0);
1075 std::cerr <<
">>> ProgramManager::getDeviceImage(Custom SPV file "
1077 <<
", " << JITCompilationIsRequired <<
")\n";
1079 std::cerr <<
"available device images:\n";
1083 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1084 std::vector<pi_device_binary> RawImgs(ImageSet.size());
1085 auto ImageIterator = ImageSet.begin();
1086 for (
size_t i = 0; i < ImageSet.size(); i++, ImageIterator++)
1087 RawImgs[i] =
const_cast<pi_device_binary>(&(*ImageIterator)->getRawData());
1097 ImageIterator = ImageSet.begin();
1098 std::advance(ImageIterator, ImgInd);
1103 std::cerr <<
"selected device image: " << &(*ImageIterator)->getRawData()
1105 (*ImageIterator)->print();
1107 return **ImageIterator;
1112 0x1 << (
static_cast<uint32_t
>(Ext) -
1114 return ((DeviceLibReqMask & Mask) == Mask);
1117 static std::vector<sycl::detail::pi::PiProgram>
1120 uint32_t DeviceLibReqMask) {
1121 std::vector<sycl::detail::pi::PiProgram> Programs;
1123 std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1138 std::string DevExtList =
1139 Context->getPlatformImpl()->getDeviceImpl(Device)->get_device_info_string(
1141 const bool fp64Support = (DevExtList.npos != DevExtList.find(
"cl_khr_fp64"));
1145 for (
auto &Pair : RequiredDeviceLibExt) {
1147 bool &FallbackIsLoaded = Pair.second;
1149 if (FallbackIsLoaded) {
1166 bool InhibitNativeImpl =
false;
1167 if (
const char *Env = getenv(
"SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1168 InhibitNativeImpl = strstr(Env, ExtName) !=
nullptr;
1171 bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1172 if (!DeviceSupports || InhibitNativeImpl) {
1175 FallbackIsLoaded =
true;
1181 FallbackIsLoaded =
true;
1190 const std::string &CompileOptions,
const std::string &LinkOptions,
1194 std::cerr <<
">>> ProgramManager::build(" << Program.get() <<
", "
1195 << CompileOptions <<
", " << LinkOptions <<
", ... " << Device
1208 DeviceLibReqMask &= 0xFFFFFFFE;
1209 bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1214 if (CompileOptions.find(std::string(
"-cmc")) != std::string::npos ||
1215 CompileOptions.find(std::string(
"-vc-codegen")) != std::string::npos)
1216 LinkDeviceLibs =
false;
1218 std::vector<sycl::detail::pi::PiProgram> LinkPrograms;
1219 if (LinkDeviceLibs) {
1223 static const char *ForceLinkEnv = std::getenv(
"SYCL_FORCE_LINK");
1224 static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv ==
'1');
1226 const PluginPtr &Plugin = Context->getPlugin();
1227 if (LinkPrograms.empty() && !ForceLink) {
1228 const std::string &Options = LinkOptions.empty()
1230 : (CompileOptions +
" " + LinkOptions);
1233 Program.get(), 1, &Device, Options.c_str(),
1235 if (Error != PI_SUCCESS)
1243 &Device, CompileOptions.c_str(), 0,
1244 nullptr,
nullptr,
nullptr,
nullptr);
1245 LinkPrograms.push_back(Program.get());
1250 Context->getHandleRef(), 1, &Device,
1251 LinkOptions.c_str(), LinkPrograms.size(), LinkPrograms.data(),
1252 nullptr,
nullptr, &LinkedProg);
1256 Program.reset(LinkedProg);
1257 if (Error != PI_SUCCESS) {
1264 Plugin->checkPiResult(Error);
1269 void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) {
1270 const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1271 Img.getAssertUsed();
1272 if (AssertUsedRange.isAvailable())
1273 for (
const auto &Prop : AssertUsedRange)
1274 m_KernelUsesAssert.insert(Prop->Name);
1278 return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
1282 const bool DumpImages = std::getenv(
"SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1288 if (EntriesB == EntriesE)
1291 auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg);
1292 static uint32_t SequenceID = 0;
1296 Img->getKernelParamOptInfo();
1298 KernelNameToArgMaskMap &ArgMaskMap =
1299 m_EliminatedKernelArgMasks[Img.get()];
1300 for (
const auto &Info : KPOIRange)
1301 ArgMaskMap[Info->Name] =
1306 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1309 auto ExportedSymbols = Img->getExportedSymbols();
1311 m_ExportedSymbols.insert(ExportedSymbol->Name);
1315 m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(),
1316 [&](
auto &CurrentImg) {
1317 return CurrentImg.first->getFormat() == Img->getFormat();
1319 dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0);
1322 m_BinImg2KernelIDs[Img.get()].reset(
new std::vector<kernel_id>);
1331 if (std::strstr(EntriesIt->name,
"__sycl_service_kernel__")) {
1332 m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
1339 if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end())
1343 auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1344 if (It == m_KernelName2KernelIDs.end()) {
1345 std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1346 std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1348 detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1350 It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
1352 m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1353 m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1356 cacheKernelUsesAssertInfo(*Img);
1359 std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1364 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1366 auto DeviceGlobals = Img->getDeviceGlobals();
1377 auto [TypeSize, DeviceImageScopeDecorated] =
1378 DeviceGlobalInfo.
consume<std::uint32_t, std::uint32_t>();
1379 assert(DeviceGlobalInfo.
empty() &&
"Extra data left!");
1384 auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1385 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1387 ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize,
1388 DeviceImageScopeDecorated);
1393 auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1394 DeviceGlobal->Name, Img.get(), TypeSize,
1395 DeviceImageScopeDecorated);
1396 m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1402 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1403 auto HostPipes = Img->getHostPipes();
1413 auto TypeSize = HostPipeInfo.
consume<std::uint32_t>();
1414 assert(HostPipeInfo.
empty() &&
"Extra data left!");
1416 auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name);
1417 if (ExistingHostPipe != m_HostPipes.end()) {
1419 ExistingHostPipe->second->initialize(TypeSize);
1420 ExistingHostPipe->second->initialize(Img.get());
1426 std::make_unique<HostPipeMapEntry>(HostPipe->Name, TypeSize);
1427 EntryUPtr->initialize(Img.get());
1428 m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr));
1432 m_DeviceImages.insert(std::move(Img));
1437 for (
const auto &ImgIt : m_BinImg2KernelIDs) {
1438 ImgIt.first->print();
1443 uint32_t SequenceID)
const {
1444 const char *Prefix = std::getenv(
"SYCL_DUMP_IMAGES_PREFIX");
1445 std::string Fname(Prefix ? Prefix :
"sycl_");
1449 Fname +=
'_' + std::to_string(SequenceID);
1461 std::ofstream F(Fname, std::ios::binary);
1464 throw runtime_error(
"Can not write " + Fname, PI_ERROR_UNKNOWN);
1474 std::cerr <<
">>> ProgramManager::flushSpecConstants(" << Prg.
get()
1481 assert(!NativePrg || !PrgHandle || (NativePrg == PrgHandle));
1482 NativePrg = NativePrg ? NativePrg : PrgHandle;
1487 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1488 auto It = NativePrograms.find(NativePrg);
1489 if (It == NativePrograms.end())
1491 sycl::errc::invalid,
1492 "spec constant is set in a program w/o a binary image");
1497 std::cerr <<
">>> ProgramManager::flushSpecConstants: binary image "
1498 << &Img->
getRawData() <<
" doesn't support spec constants\n";
1523 const std::string &KernelName) {
1525 if (m_EliminatedKernelArgMasks.empty())
1529 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1530 auto ImgIt = NativePrograms.find(NativePrg);
1531 if (ImgIt != NativePrograms.end()) {
1532 auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1533 if (MapIt != m_EliminatedKernelArgMasks.end()) {
1534 auto ArgMaskMapIt = MapIt->second.find(KernelName);
1535 if (ArgMaskMapIt != MapIt->second.end())
1536 return &MapIt->second[KernelName];
1544 for (
auto &Elem : m_EliminatedKernelArgMasks) {
1545 auto ArgMask = Elem.second.find(KernelName);
1546 if (ArgMask != Elem.second.end())
1547 return &ArgMask->second;
1555 auto IsAOTBinary = [](
const char *Format) {
1567 return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1572 const std::shared_ptr<detail::device_impl> &DeviceImpl =
1574 auto &Plugin = DeviceImpl->getPlugin();
1586 PIDeviceHandle, &DevBin,
1588 if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY)
1589 throw runtime_error(
"Invalid binary image or device",
1590 PI_ERROR_INVALID_VALUE);
1592 return (0 == SuitableImageID);
1596 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1598 auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1599 if (KernelID == m_KernelName2KernelIDs.end())
1600 throw runtime_error(
"No kernel found with the specified name",
1601 PI_ERROR_INVALID_KERNEL_NAME);
1603 return KernelID->second;
1607 std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1610 m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1612 std::shared_ptr<std::vector<kernel_id>>>
1613 Elem) { return compatibleWithDevice(Elem.first, Dev); });
1617 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1619 std::vector<sycl::kernel_id> AllKernelIDs;
1620 AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1621 for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1622 AllKernelIDs.push_back(KernelID.second);
1624 return AllKernelIDs;
1628 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1630 auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1631 if (KernelID == m_BuiltInKernelIDs.end()) {
1632 auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1633 auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1634 KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1637 return KernelID->second;
1641 const char *UniqueId) {
1642 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1644 auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1645 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1647 ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1648 m_Ptr2DeviceGlobal.insert(
1649 {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1654 std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1655 auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1656 m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1659 std::set<RTDeviceBinaryImage *>
1661 std::set<RTDeviceBinaryImage *> BinImages;
1662 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1663 for (
const kernel_id &KID : KernelIDs) {
1664 auto Range = m_KernelIDs2BinImage.equal_range(KID);
1665 for (
auto It = Range.first, End = Range.second; It != End; ++It)
1666 BinImages.insert(It->second);
1673 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1674 auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1675 assert(Entry != m_Ptr2DeviceGlobal.end() &&
"Device global entry not found");
1676 return Entry->second;
1680 const std::vector<std::string> &UniqueIds,
1681 bool ExcludeDeviceImageScopeDecorated) {
1682 std::vector<DeviceGlobalMapEntry *> FoundEntries;
1683 FoundEntries.reserve(UniqueIds.size());
1685 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1686 for (
const std::string &UniqueId : UniqueIds) {
1687 auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1688 assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1689 "Device global not found in map.");
1690 if (!ExcludeDeviceImageScopeDecorated ||
1691 !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1692 FoundEntries.push_back(DeviceGlobalEntry->second.get());
1694 return FoundEntries;
1698 const char *UniqueId) {
1699 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1701 auto ExistingHostPipe = m_HostPipes.find(UniqueId);
1702 if (ExistingHostPipe != m_HostPipes.end()) {
1703 ExistingHostPipe->second->initialize(HostPipePtr);
1704 m_Ptr2HostPipe.insert({HostPipePtr, ExistingHostPipe->second.get()});
1708 auto EntryUPtr = std::make_unique<HostPipeMapEntry>(UniqueId, HostPipePtr);
1709 auto NewEntry = m_HostPipes.emplace(UniqueId, std::move(EntryUPtr));
1710 m_Ptr2HostPipe.insert({HostPipePtr, NewEntry.first->second.get()});
1715 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1716 auto Entry = m_HostPipes.find(UniqueId);
1717 assert(Entry != m_HostPipes.end() &&
"Host pipe entry not found");
1718 return Entry->second.get();
1722 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1723 auto Entry = m_Ptr2HostPipe.find(HostPipePtr);
1724 assert(Entry != m_Ptr2HostPipe.end() &&
"Host pipe entry not found");
1725 return Entry->second;
1734 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1737 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1738 KernelIDs = m_BinImg2KernelIDs[BinImage];
1742 BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1745 return createSyclObjFromImpl<device_image_plain>(Impl);
1748 std::vector<device_image_plain>
1750 const context &Ctx,
const std::vector<device> &Devs,
1751 bundle_state TargetState,
const std::vector<kernel_id> &KernelIDs) {
1755 std::set<RTDeviceBinaryImage *> BinImages;
1756 if (!KernelIDs.empty()) {
1757 for (
const auto &KID : KernelIDs) {
1758 bool isCompatibleWithAtLeastOneDev =
1759 std::any_of(Devs.begin(), Devs.end(), [&KID](
const auto &Dev) {
1760 return sycl::is_compatible({KID}, Dev);
1762 if (!isCompatibleWithAtLeastOneDev)
1765 "Kernel is incompatible with all devices in devs");
1769 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1770 for (
auto &ImageUPtr : m_BinImg2KernelIDs) {
1771 BinImages.insert(ImageUPtr.first);
1784 for (
auto It = BinImages.begin(); It != BinImages.end();) {
1786 It = BinImages.erase(It);
1791 std::vector<device_image_plain> SYCLDeviceImages;
1799 struct DeviceBinaryImageInfo {
1800 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1802 int RequirementCounter = 0;
1804 std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
1808 using StateImagesPairT =
1809 std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
1810 using KernelImageMapT =
1811 std::map<kernel_id, StateImagesPairT, LessByNameComp>;
1812 KernelImageMapT KernelImageMap;
1813 if (!KernelIDs.empty())
1814 for (
const kernel_id &KernelID : KernelIDs)
1815 KernelImageMap.insert({KernelID, {}});
1817 for (RTDeviceBinaryImage *BinImage : BinImages) {
1822 auto InsertRes = ImageInfoMap.insert({BinImage, {}});
1823 DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
1824 if (InsertRes.second) {
1828 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1829 ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
1833 const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
1835 int &ImgRequirementCounter = ImgInfo.RequirementCounter;
1838 if (!ImageKernelIDs || ImageKernelIDs->empty())
1842 for (kernel_id &KernelID : *ImageKernelIDs) {
1843 StateImagesPairT *StateImagesPair;
1845 if (!KernelIDs.empty()) {
1846 auto It = KernelImageMap.find(KernelID);
1847 if (It == KernelImageMap.end())
1849 StateImagesPair = &It->second;
1851 StateImagesPair = &KernelImageMap[KernelID];
1853 auto &[KernelImagesState, KernelImages] = *StateImagesPair;
1855 if (KernelImages.empty()) {
1856 KernelImagesState = ImgState;
1857 KernelImages.push_back(BinImage);
1858 ++ImgRequirementCounter;
1859 }
else if (KernelImagesState < ImgState) {
1860 for (RTDeviceBinaryImage *Img : KernelImages) {
1861 auto It = ImageInfoMap.find(Img);
1862 assert(It != ImageInfoMap.end());
1863 assert(It->second.RequirementCounter > 0);
1864 --(It->second.RequirementCounter);
1866 KernelImages.clear();
1867 KernelImages.push_back(BinImage);
1868 KernelImagesState = ImgState;
1869 ++ImgRequirementCounter;
1870 }
else if (KernelImagesState == ImgState) {
1871 KernelImages.push_back(BinImage);
1872 ++ImgRequirementCounter;
1878 for (
const auto &ImgInfoPair : ImageInfoMap) {
1879 if (ImgInfoPair.second.RequirementCounter == 0)
1883 ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
1884 ImgInfoPair.second.KernelIDs,
nullptr);
1886 SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
1889 return SYCLDeviceImages;
1892 void ProgramManager::bringSYCLDeviceImagesToState(
1893 std::vector<device_image_plain> &DeviceImages,
bundle_state TargetState) {
1898 switch (TargetState) {
1913 switch (DevImageState) {
1919 std::vector<device_image_plain> LinkedDevImages =
1924 assert(LinkedDevImages.size() == 1 &&
"Expected one linked image here");
1925 DevImage = LinkedDevImages[0];
1939 std::vector<device_image_plain>
1940 ProgramManager::getSYCLDeviceImages(
const context &Ctx,
1941 const std::vector<device> &Devs,
1944 std::vector<device_image_plain> DeviceImages =
1945 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1947 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1948 return DeviceImages;
1951 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1952 const context &Ctx,
const std::vector<device> &Devs,
1955 std::vector<device_image_plain> DeviceImages =
1956 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1959 auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
1961 return !Selector(getSyclObjImpl(Image));
1963 DeviceImages.erase(It, DeviceImages.end());
1967 return DeviceImages;
1970 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
1971 const context &Ctx,
const std::vector<device> &Devs,
1972 const std::vector<kernel_id> &KernelIDs,
bundle_state TargetState) {
1974 if (KernelIDs.empty())
1978 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1980 for (
auto &It : m_BuiltInKernelIDs) {
1981 if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
1984 "Attempting to use a built-in kernel. They are "
1985 "not fully supported");
1990 std::vector<device_image_plain> DeviceImages =
1991 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
1994 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
1995 return DeviceImages;
2005 std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
2006 const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
2007 &SpecConstData = InputImpl->get_spec_const_data_ref();
2008 const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
2011 for (
const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
2012 std::ignore = SpecConstNames;
2014 if (SpecIDDesc.IsSet) {
2016 Prog, SpecIDDesc.ID, SpecIDDesc.Size,
2017 SpecConsts.data() + SpecIDDesc.BlobOffset);
2025 const std::vector<device> &Devs,
2032 const std::shared_ptr<device_image_impl> &InputImpl =
2039 if (InputImpl->get_bin_image_ref()->getFormat() !=
2042 sycl::runtime_error(
2043 "Creating a program from AOT binary for multiple device is not "
2045 PI_ERROR_INVALID_OPERATION);
2050 *InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs[0]);
2052 if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
2056 InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
2058 InputImpl->get_spec_const_data_ref(),
2059 InputImpl->get_spec_const_blob_ref());
2061 std::vector<pi_device> PIDevices;
2062 PIDevices.reserve(Devs.size());
2063 for (
const device &Dev : Devs)
2067 std::string CompileOptions;
2070 CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
2073 ObjectImpl->get_program_ref(), Devs.size(),
2074 PIDevices.data(), CompileOptions.c_str(),
2078 if (Error != PI_SUCCESS)
2081 getProgramBuildLog(ObjectImpl->get_program_ref(),
2084 return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2087 std::vector<device_image_plain>
2089 const std::vector<device> &Devs,
2093 std::vector<pi_program> PIPrograms;
2094 PIPrograms.push_back(
getSyclObjImpl(DeviceImage)->get_program_ref());
2096 std::vector<pi_device> PIDevices;
2097 PIDevices.reserve(Devs.size());
2098 for (
const device &Dev : Devs)
2101 std::string LinkOptionsStr;
2103 if (LinkOptionsStr.empty()) {
2104 const std::shared_ptr<device_image_impl> &InputImpl =
2107 *(InputImpl->get_bin_image_ref()));
2111 const PluginPtr &Plugin = ContextImpl->getPlugin();
2116 ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(),
2117 LinkOptionsStr.c_str(), PIPrograms.size(),
2120 nullptr, &LinkedProg);
2122 if (Error != PI_SUCCESS) {
2124 const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2127 Plugin->reportPiError(Error,
"link()");
2130 std::shared_ptr<std::vector<kernel_id>> KernelIDs{
new std::vector<kernel_id>};
2131 std::vector<unsigned char> NewSpecConstBlob;
2134 std::shared_ptr<device_image_impl> DeviceImageImpl =
2138 KernelIDs->insert(KernelIDs->end(),
2139 DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2140 DeviceImageImpl->get_kernel_ids_ptr()->end());
2146 const std::lock_guard<std::mutex> SpecConstLock(
2147 DeviceImageImpl->get_spec_const_data_lock());
2151 for (
const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) {
2152 std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2153 NewSpecConstMap[SpecConstIt.first];
2154 assert(NewDescEntries.empty() &&
2155 "Specialization constant already exists in the map.");
2156 NewDescEntries.reserve(SpecConstIt.second.size());
2158 SpecConstIt.second) {
2160 NewSpecConstDesc.
BlobOffset += NewSpecConstBlob.size();
2161 NewDescEntries.push_back(std::move(NewSpecConstDesc));
2167 NewSpecConstBlob.insert(NewSpecConstBlob.end(),
2168 DeviceImageImpl->get_spec_const_blob_ref().begin(),
2169 DeviceImageImpl->get_spec_const_blob_ref().end());
2177 std::make_shared<detail::device_image_impl>(
2179 LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob));
2183 return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2191 const std::vector<device> &Devs,
2195 const std::shared_ptr<device_image_impl> &InputImpl =
2198 const context Context = InputImpl->get_context();
2204 std::string CompileOpts;
2205 std::string LinkOpts;
2211 SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2214 auto BuildF = [
this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2215 &InputImpl, SpecConsts] {
2217 const PluginPtr &Plugin = ContextImpl->getPlugin();
2221 if (InputImpl->get_bin_image_ref()->getFormat() !=
2224 sycl::runtime_error(
2225 "Creating a program from AOT binary for multiple device is not "
2227 PI_ERROR_INVALID_OPERATION);
2231 auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
2232 Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2234 if (!DeviceCodeWasInCache &&
2235 InputImpl->get_bin_image_ref()->supportsSpecConstants())
2238 ProgramPtr ProgramManaged(
2239 NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
2246 uint32_t DeviceLibReqMask = 0;
2249 DeviceLibReqMask = getDeviceLibReqMask(Img);
2251 ProgramPtr BuiltProgram =
2252 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2258 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2259 NativePrograms[BuiltProgram.get()] = &Img;
2262 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2265 if (!DeviceCodeWasInCache)
2266 PersistentDeviceCodeCache::putItemToDisc(
2267 Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
2269 return BuiltProgram.release();
2272 uint32_t ImgId = Img.getImageID();
2276 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId),
2277 std::make_pair(
PiDevice, CompileOpts + LinkOpts));
2281 auto GetCachedBuildF = [&Cache, &CacheKey]() {
2287 getOrBuild<sycl::detail::pi::PiProgram, compile_program_error>(
2288 Cache, GetCachedBuildF, BuildF);
2290 assert(BuildResult !=
nullptr &&
"Invalid build result");
2297 const PluginPtr &Plugin = ContextImpl->getPlugin();
2298 auto CacheOtherDevices = [ResProgram, &Plugin]() {
2305 for (
size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2310 CacheKey.second.first = PiDeviceAdd;
2311 getOrBuild<sycl::detail::pi::PiProgram, compile_program_error>(
2312 Cache, GetCachedBuildF, CacheOtherDevices);
2314 assert(BuildResult !=
nullptr &&
"Invalid build result");
2324 InputImpl->get_kernel_ids_ptr(), ResProgram,
2325 InputImpl->get_spec_const_data_ref(),
2326 InputImpl->get_spec_const_blob_ref());
2328 return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2331 std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *>
2332 ProgramManager::getOrCreateKernel(
const context &Context,
2333 const std::string &KernelName,
2343 auto BuildF = [
this, &Program, &KernelName, &Ctx] {
2346 const PluginPtr &Plugin = Ctx->getPlugin();
2356 getEliminatedKernelArgMask(Program, KernelName);
2361 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2366 getOrBuild<KernelProgramCache::KernelArgMaskPairT, invalid_object_error>(
2367 Cache, GetCachedBuildF, BuildF);
2369 assert(BuildResult !=
nullptr &&
"Invalid build result");
2371 &(BuildResult->MBuildResultMutex),
2372 BuildResult->Ptr.load()->second);
2381 #define __SYCL_ASPECT(ASPECT, ID) \
2382 case aspect::ASPECT: \
2384 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
2387 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
2388 switch (AspectNum) {
2389 #include <sycl/info/aspects.def>
2390 #include <sycl/info/aspects_deprecated.def>
2394 std::to_string(
static_cast<unsigned>(AspectNum)));
2395 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2396 #undef __SYCL_ASPECT_DEPRECATED
2397 #undef __SYCL_ASPECT
2401 template <
typename T>
2402 static std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
2412 std::optional<sycl::exception>
2415 auto getPropIt = [&Img](
const std::string &PropName) {
2419 PropRange.
begin(), PropRange.
end(),
2421 return (*Prop)->Name == PropName;
2423 return (PropIt == PropRange.
end())
2429 auto AspectsPropIt = getPropIt(
"aspects");
2430 auto ReqdWGSizeUint32TPropIt = getPropIt(
"reqd_work_group_size");
2431 auto ReqdWGSizeUint64TPropIt = getPropIt(
"reqd_work_group_size_uint64_t");
2432 auto ReqdSubGroupSizePropIt = getPropIt(
"reqd_sub_group_size");
2435 if (AspectsPropIt) {
2440 while (!Aspects.
empty()) {
2441 aspect Aspect = Aspects.
consume<aspect>();
2442 if (!Dev.
has(Aspect))
2445 " is not supported on the device");
2450 if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
2457 bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value();
2458 auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
2463 uint64_t ReqdWGSizeAllDimsTotal = 1;
2464 std::vector<uint64_t> ReqdWGSizeVec;
2466 while (!ReqdWGSize.
empty()) {
2467 uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.
consume<uint64_t>()
2468 : ReqdWGSize.
consume<uint32_t>();
2471 ReqdWGSizeAllDimsTotal = *res;
2474 sycl::errc::kernel_not_supported,
2475 "Required work-group size is not supported"
2476 " (total number of work-items requested can't fit into size_t)");
2477 ReqdWGSizeVec.push_back(SingleDimSize);
2484 if (ReqdWGSizeAllDimsTotal >
2485 Dev.
get_info<info::device::max_work_group_size>())
2487 "Required work-group size " +
2488 std::to_string(ReqdWGSizeAllDimsTotal) +
2489 " is not supported on the device");
2492 std::variant<id<1>,
id<2>,
id<3>> MaxWorkItemSizesVariant;
2494 MaxWorkItemSizesVariant =
2497 MaxWorkItemSizesVariant =
2500 MaxWorkItemSizesVariant =
2502 for (
int i = 0; i < Dims; i++) {
2507 if (ReqdWGSizeVec[i] >
2510 "Required work-group size " +
2511 std::to_string(ReqdWGSizeVec[i]) +
2512 " is not supported");
2513 }
else if (Dims == 2) {
2514 if (ReqdWGSizeVec[i] >
2517 "Required work-group size " +
2518 std::to_string(ReqdWGSizeVec[i]) +
2519 " is not supported");
2521 if (ReqdWGSizeVec[i] >
2524 "Required work-group size " +
2525 std::to_string(ReqdWGSizeVec[i]) +
2526 " is not supported");
2531 if (ReqdSubGroupSizePropIt) {
2532 auto ReqdSubGroupSize =
2534 auto SupportedSubGroupSizes = Dev.
get_info<info::device::sub_group_sizes>();
2541 SupportedSubGroupSizes.cend(),
2542 [=](
auto s) { return s == ReqdSubGroupSize; }))
2545 std::to_string(ReqdSubGroupSize) +
2546 " is not supported on the device");
2557 sycl::detail::ProgramManager::getInstance().addImages(desc);