49 inline namespace _V1 {
52 using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
56 static constexpr
char UseSpvEnv[](
"SYCL_USE_KERNEL_SPV");
63 constexpr
char SpecValue = 1;
64 ur_specialization_constant_info_t SpecConstInfo = {
66 Plugin->call(urProgramSetSpecializationConstants, Prog, 1, &SpecConstInfo);
74 static ur_program_handle_t
76 const unsigned char *Data,
size_t DataLen,
77 const std::vector<ur_program_metadata_t> Metadata) {
78 const PluginPtr &Plugin = Context->getPlugin();
80 uint32_t NumDevices = 0;
81 Plugin->call(urContextGetInfo, Context->getHandleRef(),
82 UR_CONTEXT_INFO_NUM_DEVICES,
sizeof(NumDevices), &NumDevices,
84 assert(NumDevices > 0 &&
85 "Only a single device is supported for AOT compilation");
88 ur_program_handle_t Program;
89 ur_device_handle_t UrDevice =
getSyclObjImpl(Device)->getHandleRef();
90 ur_result_t BinaryStatus = UR_RESULT_SUCCESS;
91 ur_program_properties_t Properties = {};
92 Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES;
93 Properties.pNext =
nullptr;
94 Properties.count = Metadata.size();
95 Properties.pMetadatas = Metadata.data();
96 Plugin->call(urProgramCreateWithBinary, Context->getHandleRef(), UrDevice,
97 DataLen, Data, &Properties, &Program);
99 if (BinaryStatus != UR_RESULT_SUCCESS) {
102 "Creating program with binary failed."),
110 const unsigned char *Data,
112 ur_program_handle_t Program =
nullptr;
113 const PluginPtr &Plugin = Context->getPlugin();
114 Plugin->call(urProgramCreateWithIL, Context->getHandleRef(), Data, DataLen,
136 for (
const device &D : Devices) {
137 if (!D.get_info<info::device::is_compiler_available>())
143 std::string ver = C.
get_platform().get_info<info::platform::version>();
144 if (ver.find(
"OpenCL 1.0") == std::string::npos &&
145 ver.find(
"OpenCL 1.1") == std::string::npos &&
146 ver.find(
"OpenCL 1.2") == std::string::npos &&
147 ver.find(
"OpenCL 2.0") == std::string::npos)
151 for (
const device &D : Devices) {
154 std::vector<std::string> Extensions =
155 D.get_info<info::device::extensions>();
156 if (Extensions.end() ==
157 std::find(Extensions.begin(), Extensions.end(),
"cl_khr_il_program"))
176 assert(
false &&
"Unknown device image format");
184 std::cerr <<
">>> ProgramManager::createPIProgram(" << &Img <<
", "
192 "Malformed device program image descriptor");
196 "Invalid device program image: size is zero");
198 size_t ImgSize = Img.
getSize();
215 sycl::errc::feature_not_supported,
216 "SPIR-V online compilation is not supported in this context");
223 ur_program_handle_t Res =
230 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
232 NativePrograms.insert({Res, &Img});
235 Ctx->addDeviceGlobalInitializer(Res, {Device}, &Img);
250 if (TemporaryStr !=
nullptr) {
251 if (!LinkOpts.empty())
253 LinkOpts += std::string(TemporaryStr);
259 const char *PropName) {
265 const char *PropName) {
267 std::stringstream ss;
271 if (optLevel < 0 || optLevel > 3)
273 ss <<
"-O" << optLevel;
274 std::string temp = ss.str();
288 if (!RegAllocModeProp && !GRFSizeProp)
292 assert(!RegAllocModeProp || !GRFSizeProp);
293 bool Is256GRF =
false;
294 bool IsAutoGRF =
false;
295 if (RegAllocModeProp) {
296 uint32_t RegAllocModePropVal =
298 Is256GRF = RegAllocModePropVal ==
300 IsAutoGRF = RegAllocModePropVal ==
305 Is256GRF = GRFSizePropVal == 256;
306 IsAutoGRF = GRFSizePropVal == 0;
309 if (!CompileOpts.empty())
312 CompileOpts += IsEsimdImage ?
"-doubleGRF" :
"-ze-opt-large-register-file";
315 if (!CompileOpts.empty())
318 CompileOpts +=
"-ze-intel-enable-auto-large-GRF-mode";
324 const std::vector<device> &Devs,
329 static const char *CompileOptsEnv =
333 if (!CompileOptsEnv) {
334 if (!CompileOpts.empty())
337 if (TemporaryStr !=
nullptr)
338 CompileOpts += std::string(TemporaryStr);
344 if (!CompileOpts.empty())
346 CompileOpts +=
"-vc-codegen";
350 CompileOpts +=
" -disable-finalizer-msg";
359 const char *optLevelStr = str.c_str();
364 if (!isEsimdImage && !CompileOptsEnv && optLevelStr !=
nullptr &&
365 optLevelStr[0] !=
'\0') {
367 assert(!Devs.empty() &&
369 return Dev.get_platform() == Devs[0].get_platform();
371 const char *backend_option =
nullptr;
374 PlatformImpl->getBackendOption(optLevelStr, &backend_option);
375 if (backend_option && backend_option[0] !=
'\0') {
376 if (!CompileOpts.empty())
378 CompileOpts += std::string(backend_option);
385 return Dev.is_gpu() &&
386 Dev.get_info<info::device::vendor_id>() == 0x8086;
388 if (!CompileOptsEnv) {
389 static const char *TargetCompileFast =
"-ftarget-compile-fast";
390 if (
auto Pos = CompileOpts.find(TargetCompileFast);
391 Pos != std::string::npos) {
392 const char *BackendOption =
nullptr;
394 PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
395 auto OptLen = strlen(TargetCompileFast);
396 if (IsIntelGPU && BackendOption && BackendOption[0] !=
'\0')
397 CompileOpts.replace(Pos, OptLen, BackendOption);
399 CompileOpts.erase(Pos, OptLen);
401 static const std::string TargetRegisterAllocMode =
402 "-ftarget-register-alloc-mode=";
403 auto OptPos = CompileOpts.find(TargetRegisterAllocMode);
404 while (OptPos != std::string::npos) {
405 auto EndOfOpt = CompileOpts.find(
" ", OptPos);
407 auto OptValue = CompileOpts.substr(
408 OptPos + TargetRegisterAllocMode.size(),
409 EndOfOpt - OptPos - TargetRegisterAllocMode.size());
410 auto ColonPos = OptValue.find(
":");
411 auto Device = OptValue.substr(0, ColonPos);
412 std::string BackendStrToAdd;
416 (Dev.get_info<ext::intel::info::device::device_id>() &
420 if (Device ==
"pvc" && IsPVC)
421 BackendStrToAdd =
" " + OptValue.substr(ColonPos + 1) +
" ";
424 std::string NewCompileOpts =
425 CompileOpts.substr(0, OptPos) + BackendStrToAdd;
427 if (EndOfOpt != std::string::npos)
428 NewCompileOpts += CompileOpts.substr(EndOfOpt);
429 CompileOpts = NewCompileOpts;
430 OptPos = CompileOpts.find(TargetRegisterAllocMode);
437 static const char *AppendCompileOptsEnv =
439 if (AppendCompileOptsEnv) {
440 if (!CompileOpts.empty())
442 CompileOpts += AppendCompileOptsEnv;
446 static const char *AppendLinkOptsEnv =
448 if (AppendLinkOptsEnv) {
449 if (!LinkOpts.empty())
451 LinkOpts += AppendLinkOptsEnv;
456 std::string &LinkOpts,
458 const std::vector<device> &Devices,
467 static const char *CompileOptsEnv =
469 if (CompileOptsEnv) {
470 CompileOpts = CompileOptsEnv;
479 LinkOpts = LinkOptsEnv;
484 std::string &LinkOpts) {
492 const std::vector<const RTDeviceBinaryImage *> &AllImages,
494 const std::string &CompileAndLinkOptions,
SerializedObj SpecConsts) {
495 ur_program_handle_t NativePrg;
498 Device, AllImages, SpecConsts, CompileAndLinkOptions);
499 if (BinProg.size()) {
501 std::vector<ur_program_metadata_t> ProgMetadataVector;
503 auto ProgMetadata = Img->getProgramMetadata();
504 for (
const auto &Prop : ProgMetadata) {
505 ProgMetadataVector.push_back(
511 (
const unsigned char *)BinProg[0].data(),
512 BinProg[0].size(), ProgMetadataVector);
516 return {NativePrg, BinProg.size()};
524 std::string ProgramBuildLog =
526 std::clog << ProgramBuildLog << std::endl;
532 return UR_DEVICE_BINARY_TARGET_UNKNOWN;
534 return UR_DEVICE_BINARY_TARGET_SPIRV32;
536 return UR_DEVICE_BINARY_TARGET_SPIRV64;
537 else if (strcmp(URDeviceTarget,
539 return UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64;
542 return UR_DEVICE_BINARY_TARGET_SPIRV64_GEN;
543 else if (strcmp(URDeviceTarget,
545 return UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA;
547 return UR_DEVICE_BINARY_TARGET_NVPTX64;
549 return UR_DEVICE_BINARY_TARGET_AMDGCN;
554 return UR_DEVICE_BINARY_TARGET_UNKNOWN;
559 const std::shared_ptr<detail::device_impl> &DeviceImpl =
561 auto &Plugin = DeviceImpl->getPlugin();
563 const ur_device_handle_t &URDeviceHandle = DeviceImpl->getHandleRef();
572 ur_device_binary_t UrBinary{};
576 Plugin->call_nocheck(urDeviceSelectBinary, URDeviceHandle, &UrBinary,
577 (uint32_t)1, &SuitableImageID);
578 if (Error != UR_RESULT_SUCCESS && Error != UR_RESULT_ERROR_INVALID_BINARY)
580 "Invalid binary image or device"),
583 return (0 == SuitableImageID);
586 std::set<RTDeviceBinaryImage *>
587 ProgramManager::collectDeviceImageDepsForImportedSymbols(
588 const RTDeviceBinaryImage &MainImg,
device Dev) {
589 std::set<RTDeviceBinaryImage *> DeviceImagesToLink;
590 std::set<std::string> HandledSymbols;
591 std::queue<std::string> WorkList;
593 MainImg.getImportedSymbols()) {
594 WorkList.push(ISProp->Name);
595 HandledSymbols.insert(ISProp->Name);
600 "Dynamic linking is not supported for AOT compilation yet");
601 while (!WorkList.empty()) {
602 std::string Symbol = WorkList.front();
605 auto Range = m_ExportedSymbolImages.equal_range(Symbol);
607 for (
auto It = Range.first; It != Range.second; ++It) {
608 RTDeviceBinaryImage *Img = It->second;
609 if (Img->getFormat() != Format ||
613 DeviceImagesToLink.insert(Img);
616 Img->getImportedSymbols()) {
617 if (HandledSymbols.insert(ISProp->Name).second)
618 WorkList.push(ISProp->Name);
624 "No device image found for external symbol " +
627 DeviceImagesToLink.erase(
const_cast<RTDeviceBinaryImage *
>(&MainImg));
628 return DeviceImagesToLink;
631 std::set<RTDeviceBinaryImage *>
632 ProgramManager::collectDependentDeviceImagesForVirtualFunctions(
633 const RTDeviceBinaryImage &Img, device Dev) {
638 std::set<RTDeviceBinaryImage *> DeviceImagesToLink;
643 std::set<std::string> HandledSets;
644 std::queue<std::string> WorkList;
646 std::string StrValue = DeviceBinaryProperty(VFProp).asCString();
650 assert(std::string(VFProp->Name) ==
"uses-virtual-functions-set" &&
651 "Unexpected virtual function property");
653 WorkList.push(SetName);
654 HandledSets.insert(SetName);
658 while (!WorkList.empty()) {
659 std::string SetName = WorkList.front();
665 for (RTDeviceBinaryImage *BinImage : m_VFSet2BinImage[SetName]) {
671 BinImage->getVirtualFunctions()) {
672 std::string StrValue = DeviceBinaryProperty(VFProp).asCString();
674 if (HandledSets.insert(SetName).second)
675 WorkList.push(SetName);
686 DeviceImagesToLink.insert(BinImage);
693 DeviceImagesToLink.erase(
const_cast<RTDeviceBinaryImage *
>(&Img));
695 return DeviceImagesToLink;
700 ur_program_handle_t Prog,
const PluginPtr &Plugin) {
704 std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
705 const std::map<std::string, std::vector<device_image_impl::SpecConstDescT>>
706 &SpecConstData = InputImpl->get_spec_const_data_ref();
707 const SerializedObj &SpecConsts = InputImpl->get_spec_const_blob_ref();
710 for (
const auto &[SpecConstNames, SpecConstDescs] : SpecConstData) {
711 std::ignore = SpecConstNames;
713 if (SpecIDDesc.IsSet) {
714 ur_specialization_constant_info_t SpecConstInfo = {
715 SpecIDDesc.ID, SpecIDDesc.Size,
716 SpecConsts.data() + SpecIDDesc.BlobOffset};
717 Plugin->call(urProgramSetSpecializationConstants, Prog, 1,
728 const std::string &KernelName,
const NDRDescT &NDRDesc,
729 bool JITCompilationIsRequired) {
732 std::string CompileOpts;
733 std::string LinkOpts;
742 while (!RootDevImpl->isRootDevice()) {
744 RootDevImpl->get_info<info::device::parent_device>());
746 if (!ContextImpl->hasDevice(ParentDev))
748 RootDevImpl = ParentDev;
751 ur_bool_t MustBuildOnSubdevice =
true;
752 ContextImpl->getPlugin()->call(urDeviceGetInfo, RootDevImpl->getHandleRef(),
753 UR_DEVICE_INFO_BUILD_ON_SUBDEVICE,
754 sizeof(ur_bool_t), &MustBuildOnSubdevice,
757 DeviceImplPtr Dev = (MustBuildOnSubdevice ==
true) ? DeviceImpl : RootDevImpl;
758 auto Context = createSyclObjFromImpl<context>(ContextImpl);
759 auto Device = createSyclObjFromImpl<device>(Dev);
761 getDeviceImage(KernelName, Context, Device, JITCompilationIsRequired);
770 std::set<RTDeviceBinaryImage *> DeviceImagesToLink =
771 collectDependentDeviceImagesForVirtualFunctions(Img, Device);
773 std::set<RTDeviceBinaryImage *> ImageDeps =
774 collectDeviceImageDepsForImportedSymbols(Img, Device);
775 DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end());
777 std::vector<const RTDeviceBinaryImage *> AllImages;
778 AllImages.reserve(ImageDeps.size() + 1);
779 AllImages.push_back(&Img);
780 std::copy(ImageDeps.begin(), ImageDeps.end(), std::back_inserter(AllImages));
782 auto BuildF = [
this, &Img, &Context, &ContextImpl, &Device, &CompileOpts,
783 &LinkOpts, SpecConsts, &DeviceImagesToLink, &AllImages] {
784 const PluginPtr &Plugin = ContextImpl->getPlugin();
790 Img, AllImages, Context, Device, CompileOpts + LinkOpts, SpecConsts);
792 if (!DeviceCodeWasInCache) {
797 ProgramPtr ProgramManaged(NativePrg, urProgramRelease);
805 uint32_t DeviceLibReqMask = 0;
806 if (!DeviceCodeWasInCache &&
811 std::vector<ur_program_handle_t> ProgramsToLink;
814 if (!DeviceCodeWasInCache) {
818 const std::shared_ptr<detail::device_image_impl> &DeviceImageImpl =
822 DeviceImageImpl->get_spec_const_blob_ref();
824 ur_program_handle_t NativePrg =
827 if (BinImg->supportsSpecConstants())
830 ProgramsToLink.push_back(NativePrg);
833 ProgramPtr BuiltProgram =
834 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
838 for (ur_program_handle_t Prg : ProgramsToLink)
839 Plugin->call(urProgramRelease, Prg);
844 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
845 NativePrograms.insert({BuiltProgram.get(), &Img});
847 NativePrograms.insert({BuiltProgram.get(), LinkedImg});
851 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img);
854 if (!DeviceCodeWasInCache) {
856 CompileOpts + LinkOpts,
859 return BuiltProgram.release();
863 const ur_device_handle_t UrDevice = Dev->getHandleRef();
865 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), UrDevice);
867 auto GetCachedBuildF = [&Cache, &CacheKey]() {
876 assert(BuildResult !=
nullptr &&
"Invalid build result");
878 ur_program_handle_t ResProgram = BuildResult->Val;
879 auto Plugin = ContextImpl->getPlugin();
886 CacheKey.first.second = BImg->getImageID();
890 Plugin->call(urProgramRetain, ResProgram);
898 ContextImpl->getPlugin()->call(urProgramRetain, ResProgram);
904 std::tuple<ur_kernel_handle_t, std::mutex *,
const KernelArgMask *,
908 const std::string &KernelName,
911 std::cerr <<
">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get()
912 <<
", " << DeviceImpl.get() <<
", " << KernelName <<
")\n";
919 std::string CompileOpts, LinkOpts;
925 ur_device_handle_t UrDevice = DeviceImpl->getHandleRef();
928 CompileOpts + LinkOpts, KernelName);
931 constexpr
size_t Kernel = 0;
932 constexpr
size_t Program = 3;
933 if (std::get<Kernel>(ret_tuple)) {
936 ContextImpl->getPlugin()->call(urKernelRetain,
937 std::get<Kernel>(ret_tuple));
938 ContextImpl->getPlugin()->call(urProgramRetain,
939 std::get<Program>(ret_tuple));
944 ur_program_handle_t Program =
947 auto BuildF = [
this, &Program, &KernelName, &ContextImpl] {
948 ur_kernel_handle_t
Kernel =
nullptr;
950 const PluginPtr &Plugin = ContextImpl->getPlugin();
952 KernelName.c_str(), &
Kernel);
955 if (ContextImpl->getPlatformImpl()->supports_usm()) {
958 const ur_bool_t UrTrue =
true;
959 Plugin->call(urKernelSetExecInfo,
Kernel,
960 UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS,
sizeof(ur_bool_t),
967 return std::make_pair(
Kernel, ArgMask);
970 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
978 auto [
Kernel, ArgMask] = BuildF();
984 assert(BuildResult !=
nullptr &&
"Invalid build result");
985 const KernelArgMaskPairT &KernelArgMaskPair = BuildResult->Val;
987 &(BuildResult->MBuildResultMutex),
988 KernelArgMaskPair.second, Program);
993 ContextImpl->getPlugin()->call(urKernelRetain, KernelArgMaskPair.first);
1001 ur_program_handle_t Program;
1002 const PluginPtr &Plugin = Context->getPlugin();
1003 Plugin->call(urKernelGetInfo,
Kernel, UR_KERNEL_INFO_PROGRAM,
1004 sizeof(ur_program_handle_t), &Program,
nullptr);
1011 size_t URDevicesSize = 0;
1012 const PluginPtr &Plugin = Context->getPlugin();
1013 Plugin->call(urProgramGetInfo, Program, UR_PROGRAM_INFO_DEVICES, 0,
nullptr,
1015 std::vector<ur_device_handle_t> URDevices(URDevicesSize /
1016 sizeof(ur_device_handle_t));
1017 Plugin->call(urProgramGetInfo, Program, UR_PROGRAM_INFO_DEVICES,
1018 URDevicesSize, URDevices.data(),
nullptr);
1019 std::string Log =
"The program was built for " +
1020 std::to_string(URDevices.size()) +
" devices";
1021 for (ur_device_handle_t &Device : URDevices) {
1022 std::string DeviceBuildInfoString;
1023 size_t DeviceBuildInfoStrSize = 0;
1024 Plugin->call(urProgramGetBuildInfo, Program, Device,
1025 UR_PROGRAM_BUILD_INFO_LOG, 0,
nullptr,
1026 &DeviceBuildInfoStrSize);
1027 if (DeviceBuildInfoStrSize > 0) {
1028 std::vector<char> DeviceBuildInfo(DeviceBuildInfoStrSize);
1029 Plugin->call(urProgramGetBuildInfo, Program, Device,
1030 UR_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize,
1031 DeviceBuildInfo.data(),
nullptr);
1032 DeviceBuildInfoString = std::string(DeviceBuildInfo.data());
1035 std::string DeviceNameString;
1036 size_t DeviceNameStrSize = 0;
1037 Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_NAME, 0,
nullptr,
1038 &DeviceNameStrSize);
1039 if (DeviceNameStrSize > 0) {
1040 std::vector<char> DeviceName(DeviceNameStrSize);
1041 Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_NAME,
1042 DeviceNameStrSize, DeviceName.data(),
nullptr);
1043 DeviceNameString = std::string(DeviceName.data());
1045 Log +=
"\nBuild program log for '" + DeviceNameString +
"':\n" +
1046 DeviceBuildInfoString;
1055 ur_program_handle_t &Prog) {
1058 std::ifstream::in | std::ifstream::binary);
1063 File.seekg(0, std::ios::end);
1064 size_t FileSize = File.tellg();
1065 File.seekg(0, std::ios::beg);
1066 std::vector<char> FileContent(FileSize);
1067 File.read(&FileContent[0], FileSize);
1072 return Prog !=
nullptr;
1077 static const std::map<DeviceLibExt, std::pair<const char *, const char *>>
1080 {
nullptr,
"libsycl-fallback-cassert.spv"}},
1082 {
nullptr,
"libsycl-fallback-cmath.spv"}},
1084 {
nullptr,
"libsycl-fallback-cmath-fp64.spv"}},
1086 {
nullptr,
"libsycl-fallback-complex.spv"}},
1088 {
nullptr,
"libsycl-fallback-complex-fp64.spv"}},
1090 {
nullptr,
"libsycl-fallback-cstring.spv"}},
1092 {
nullptr,
"libsycl-fallback-imf.spv"}},
1094 {
nullptr,
"libsycl-fallback-imf-fp64.spv"}},
1096 {
nullptr,
"libsycl-fallback-imf-bf16.spv"}},
1098 {
"libsycl-native-bfloat16.spv",
"libsycl-fallback-bfloat16.spv"}}};
1102 const char *Lib =
nullptr;
1104 Lib = Native ? LibPair->second.first : LibPair->second.second;
1107 "Unhandled (new?) device library extension");
1118 "cl_intel_devicelib_math_fp64"},
1121 "cl_intel_devicelib_complex_fp64"},
1127 "cl_intel_bfloat16_conversions"}};
1133 "Unhandled (new?) device library extension");
1138 ur_program_handle_t Program, uint32_t NumDevs,
1139 ur_device_handle_t *Devs, ur_context_handle_t Ctx,
1144 Plugin->call_nocheck(urProgramCompileExp, Program, NumDevs, Devs, Opts);
1145 if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1146 return Plugin->call_nocheck(urProgramCompile, Ctx, Program, Opts);
1153 ur_device_handle_t Device,
1154 bool UseNativeLib) {
1158 auto LockedCache = Context->acquireCachedLibPrograms();
1159 auto CachedLibPrograms = LockedCache.get();
1160 auto CacheResult = CachedLibPrograms.emplace(
1161 std::make_pair(std::make_pair(Extension, Device),
nullptr));
1162 bool Cached = !CacheResult.second;
1163 auto LibProgIt = CacheResult.first;
1164 ur_program_handle_t &LibProg = LibProgIt->second;
1170 CachedLibPrograms.erase(LibProgIt);
1172 std::string(
"Failed to load ") + LibFileName);
1175 const PluginPtr &Plugin = Context->getPlugin();
1181 doCompile(Plugin, LibProg, 1, &Device, Context->getHandleRef(),
"");
1182 if (Error != UR_RESULT_SUCCESS) {
1183 CachedLibPrograms.erase(LibProgIt);
1194 const char *SpvFile = std::getenv(
UseSpvEnv);
1198 m_UseSpvFile =
true;
1201 std::ifstream File(SpvFile, std::ios::binary);
1203 if (!File.is_open())
1205 std::string(
"Can't open file specified via ") +
1207 File.seekg(0, std::ios::end);
1208 size_t Size = File.tellg();
1209 std::unique_ptr<char[]> Data(
new char[Size]);
1211 File.read(Data.get(), Size);
1215 std::string(
"read from ") + SpvFile +
1216 std::string(
" failed"));
1221 std::make_unique<DynRTDeviceBinaryImage>(std::move(Data), Size);
1224 std::cerr <<
"loaded device image binary from " << SpvFile <<
"\n";
1232 bool JITCompilationIsRequired) {
1233 if (!JITCompilationIsRequired)
1244 "Recompiling AOT image is not supported");
1248 template <
typename StorageKey>
1250 const std::unordered_multimap<StorageKey, RTDeviceBinaryImage *> &ImagesSet,
1251 const StorageKey &Key,
const context &Context,
const device &Device) {
1252 auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key);
1253 if (ItBegin == ItEnd)
1256 std::vector<sycl_device_binary> RawImgs(std::distance(ItBegin, ItEnd));
1258 for (
unsigned I = 0; It != ItEnd; ++It, ++I)
1262 std::vector<ur_device_binary_t> UrBinaries(RawImgs.size());
1263 for (uint32_t BinaryCount = 0; BinaryCount < RawImgs.size(); BinaryCount++) {
1264 UrBinaries[BinaryCount].pDeviceTargetSpec =
1268 uint32_t ImgInd = 0;
1273 UrBinaries.data(), UrBinaries.size(), &ImgInd);
1274 std::advance(ItBegin, ImgInd);
1275 return ItBegin->second;
1278 RTDeviceBinaryImage &
1281 bool JITCompilationIsRequired) {
1283 std::cerr <<
">>> ProgramManager::getDeviceImage(\"" << KernelName <<
"\", "
1286 << JITCompilationIsRequired <<
")\n";
1288 std::cerr <<
"available device images:\n";
1293 assert(m_SpvFileImage);
1295 std::unordered_set<RTDeviceBinaryImage *>({m_SpvFileImage.get()}),
1296 Context, Device, JITCompilationIsRequired);
1301 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1302 if (
auto KernelId = m_KernelName2KernelIDs.find(KernelName);
1303 KernelId != m_KernelName2KernelIDs.end()) {
1307 assert(Img &&
"No binary image found for kernel id");
1324 "No kernel named " + KernelName +
" was found");
1328 const std::unordered_set<RTDeviceBinaryImage *> &ImageSet,
1330 bool JITCompilationIsRequired) {
1331 assert(ImageSet.size() > 0);
1334 std::cerr <<
">>> ProgramManager::getDeviceImage(Custom SPV file "
1337 << JITCompilationIsRequired <<
")\n";
1339 std::cerr <<
"available device images:\n";
1343 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1344 std::vector<sycl_device_binary> RawImgs(ImageSet.size());
1345 auto ImageIterator = ImageSet.begin();
1346 for (
size_t i = 0; i < ImageSet.size(); i++, ImageIterator++)
1349 uint32_t ImgInd = 0;
1353 std::vector<ur_device_binary_t> UrBinaries(RawImgs.size());
1354 for (uint32_t BinaryCount = 0; BinaryCount < RawImgs.size(); BinaryCount++) {
1355 UrBinaries[BinaryCount].pDeviceTargetSpec =
1361 UrBinaries.data(), UrBinaries.size(), &ImgInd);
1363 ImageIterator = ImageSet.begin();
1364 std::advance(ImageIterator, ImgInd);
1369 std::cerr <<
"selected device image: " << &(*ImageIterator)->getRawData()
1371 (*ImageIterator)->print();
1373 return **ImageIterator;
1378 0x1 << (
static_cast<uint32_t
>(Ext) -
1380 return ((DeviceLibReqMask & Mask) == Mask);
1383 static std::vector<ur_program_handle_t>
1385 const ur_device_handle_t &Device,
1386 uint32_t DeviceLibReqMask) {
1387 std::vector<ur_program_handle_t> Programs;
1389 std::pair<DeviceLibExt, bool> RequiredDeviceLibExt[] = {
1404 std::string DevExtList =
1405 Context->getPlatformImpl()->getDeviceImpl(Device)->get_device_info_string(
1407 const bool fp64Support = (DevExtList.npos != DevExtList.find(
"cl_khr_fp64"));
1411 for (
auto &Pair : RequiredDeviceLibExt) {
1413 bool &FallbackIsLoaded = Pair.second;
1415 if (FallbackIsLoaded) {
1432 bool InhibitNativeImpl =
false;
1433 if (
const char *Env = getenv(
"SYCL_DEVICELIB_INHIBIT_NATIVE")) {
1434 InhibitNativeImpl = strstr(Env, ExtName) !=
nullptr;
1437 bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName);
1438 if (!DeviceSupports || InhibitNativeImpl) {
1441 FallbackIsLoaded =
true;
1447 FallbackIsLoaded =
true;
1456 const std::string &CompileOptions,
const std::string &LinkOptions,
1457 ur_device_handle_t Device, uint32_t DeviceLibReqMask,
1458 const std::vector<ur_program_handle_t> &ExtraProgramsToLink) {
1461 std::cerr <<
">>> ProgramManager::build(" << Program.get() <<
", "
1462 << CompileOptions <<
", " << LinkOptions <<
", ... " << Device
1466 bool LinkDeviceLibs = (DeviceLibReqMask != 0);
1471 if (CompileOptions.find(std::string(
"-cmc")) != std::string::npos ||
1472 CompileOptions.find(std::string(
"-vc-codegen")) != std::string::npos)
1473 LinkDeviceLibs =
false;
1475 std::vector<ur_program_handle_t> LinkPrograms;
1476 if (LinkDeviceLibs) {
1480 static const char *ForceLinkEnv = std::getenv(
"SYCL_FORCE_LINK");
1481 static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv ==
'1');
1483 const PluginPtr &Plugin = Context->getPlugin();
1484 if (LinkPrograms.empty() && ExtraProgramsToLink.empty() && !ForceLink) {
1485 const std::string &Options = LinkOptions.empty()
1487 : (CompileOptions +
" " + LinkOptions);
1489 Plugin->call_nocheck(urProgramBuildExp, Program.get(),
1490 1, &Device, Options.c_str());
1491 if (Error == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1492 Error = Plugin->call_nocheck(urProgramBuild, Context->getHandleRef(),
1493 Program.get(), Options.c_str());
1496 if (Error != UR_RESULT_SUCCESS)
1506 auto Res =
doCompile(Plugin, Program.get(), 1, &Device,
1507 Context->getHandleRef(), CompileOptions.c_str());
1509 LinkPrograms.push_back(Program.get());
1511 for (ur_program_handle_t Prg : ExtraProgramsToLink) {
1513 Plugin->call_nocheck(urProgramCompileExp, Prg, 1,
1514 &Device, CompileOptions.c_str());
1515 if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1516 Plugin->call(urProgramCompile, Context->getHandleRef(), Prg,
1517 CompileOptions.c_str());
1519 Plugin->checkUrResult(Result);
1521 LinkPrograms.push_back(Prg);
1524 ur_program_handle_t LinkedProg =
nullptr;
1526 auto Res = Plugin->call_nocheck(urProgramLinkExp, Context->getHandleRef(),
1528 LinkPrograms.size(), LinkPrograms.data(),
1529 LinkOptions.c_str(), &LinkedProg);
1530 if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
1531 Res = Plugin->call_nocheck(urProgramLink, Context->getHandleRef(),
1532 LinkPrograms.size(), LinkPrograms.data(),
1533 LinkOptions.c_str(), &LinkedProg);
1537 ur_result_t Error = doLink();
1538 if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES ||
1539 Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
1540 Context->getKernelProgramCache().reset();
1546 Program.reset(LinkedProg);
1547 if (Error != UR_RESULT_SUCCESS) {
1556 Plugin->checkUrResult(Error);
1561 void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) {
1562 const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
1563 Img.getAssertUsed();
1564 if (AssertUsedRange.isAvailable())
1565 for (
const auto &Prop : AssertUsedRange)
1566 m_KernelUsesAssert.insert(Prop->Name);
1570 return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
1574 const bool DumpImages = std::getenv(
"SYCL_DUMP_IMAGES") && !m_UseSpvFile;
1580 if (EntriesB == EntriesE)
1583 auto Img = std::make_unique<RTDeviceBinaryImage>(RawImg);
1584 static uint32_t SequenceID = 0;
1588 Img->getKernelParamOptInfo();
1590 KernelNameToArgMaskMap &ArgMaskMap =
1591 m_EliminatedKernelArgMasks[Img.get()];
1592 for (
const auto &Info : KPOIRange)
1593 ArgMaskMap[Info->Name] =
1598 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1602 Img->getExportedSymbols()) {
1603 m_ExportedSymbolImages.insert({ESProp->Name, Img.get()});
1608 Img->getVirtualFunctions()) {
1611 m_VFSet2BinImage[SetName].insert(Img.get());
1616 m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(),
1617 [&](
auto &CurrentImg) {
1618 return CurrentImg.first->getFormat() == Img->getFormat();
1620 dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0);
1623 m_BinImg2KernelIDs[Img.get()].reset(
new std::vector<kernel_id>);
1632 if (std::strstr(EntriesIt->name,
"__sycl_service_kernel__")) {
1633 m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
1640 if (m_ExportedSymbolImages.find(EntriesIt->name) !=
1641 m_ExportedSymbolImages.end())
1645 auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1646 if (It == m_KernelName2KernelIDs.end()) {
1647 std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1648 std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1650 detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
1652 It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
1654 m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
1655 m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
1658 cacheKernelUsesAssertInfo(*Img);
1663 m_AsanFoundInImage |=
1668 std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
1673 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1675 auto DeviceGlobals = Img->getDeviceGlobals();
1686 auto [TypeSize, DeviceImageScopeDecorated] =
1687 DeviceGlobalInfo.
consume<std::uint32_t, std::uint32_t>();
1688 assert(DeviceGlobalInfo.
empty() &&
"Extra data left!");
1693 auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name);
1694 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1696 ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize,
1697 DeviceImageScopeDecorated);
1702 auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
1703 DeviceGlobal->Name, Img.get(), TypeSize,
1704 DeviceImageScopeDecorated);
1705 m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
1711 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1712 auto HostPipes = Img->getHostPipes();
1722 auto TypeSize = HostPipeInfo.
consume<std::uint32_t>();
1723 assert(HostPipeInfo.
empty() &&
"Extra data left!");
1725 auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name);
1726 if (ExistingHostPipe != m_HostPipes.end()) {
1728 ExistingHostPipe->second->initialize(TypeSize);
1729 ExistingHostPipe->second->initialize(Img.get());
1735 std::make_unique<HostPipeMapEntry>(HostPipe->Name, TypeSize);
1736 EntryUPtr->initialize(Img.get());
1737 m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr));
1741 m_DeviceImages.insert(std::move(Img));
1746 for (
const auto &ImgIt : m_BinImg2KernelIDs) {
1747 ImgIt.first->print();
1752 uint32_t SequenceID)
const {
1753 const char *Prefix = std::getenv(
"SYCL_DUMP_IMAGES_PREFIX");
1754 std::string Fname(Prefix ? Prefix :
"sycl_");
1758 Fname +=
'_' + std::to_string(SequenceID);
1770 std::ofstream F(Fname, std::ios::binary);
1790 const std::string &KernelName) {
1792 if (m_EliminatedKernelArgMasks.empty())
1796 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1797 auto Range = NativePrograms.equal_range(NativePrg);
1798 for (
auto ImgIt = Range.first; ImgIt != Range.second; ++ImgIt) {
1799 auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1800 if (MapIt == m_EliminatedKernelArgMasks.end())
1802 auto ArgMaskMapIt = MapIt->second.find(KernelName);
1803 if (ArgMaskMapIt != MapIt->second.end())
1804 return &MapIt->second[KernelName];
1806 if (Range.first != Range.second)
1812 for (
auto &Elem : m_EliminatedKernelArgMasks) {
1813 auto ArgMask = Elem.second.find(KernelName);
1814 if (ArgMask != Elem.second.end())
1815 return &ArgMask->second;
1823 auto IsAOTBinary = [](
const char *Format) {
1834 return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input;
1838 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1840 auto KernelID = m_KernelName2KernelIDs.find(KernelName);
1841 if (KernelID == m_KernelName2KernelIDs.end())
1843 "No kernel found with the specified name");
1845 return KernelID->second;
1849 std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
1852 m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(),
1854 std::shared_ptr<std::vector<kernel_id>>>
1855 Elem) { return compatibleWithDevice(Elem.first, Dev); });
1859 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1861 std::vector<sycl::kernel_id> AllKernelIDs;
1862 AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
1863 for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
1864 AllKernelIDs.push_back(KernelID.second);
1866 return AllKernelIDs;
1870 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
1872 auto KernelID = m_BuiltInKernelIDs.find(KernelName);
1873 if (KernelID == m_BuiltInKernelIDs.end()) {
1874 auto Impl = std::make_shared<kernel_id_impl>(KernelName);
1875 auto CachedID = createSyclObjFromImpl<kernel_id>(Impl);
1876 KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first;
1879 return KernelID->second;
1883 const char *UniqueId) {
1884 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1886 auto ExistingDeviceGlobal = m_DeviceGlobals.find(UniqueId);
1887 if (ExistingDeviceGlobal != m_DeviceGlobals.end()) {
1889 ExistingDeviceGlobal->second->initialize(DeviceGlobalPtr);
1890 m_Ptr2DeviceGlobal.insert(
1891 {DeviceGlobalPtr, ExistingDeviceGlobal->second.get()});
1896 std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
1897 auto NewEntry = m_DeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
1898 m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
1901 std::set<RTDeviceBinaryImage *>
1903 std::set<RTDeviceBinaryImage *> BinImages;
1904 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1905 for (
const kernel_id &KID : KernelIDs) {
1906 auto Range = m_KernelIDs2BinImage.equal_range(KID);
1907 for (
auto It = Range.first, End = Range.second; It != End; ++It)
1908 BinImages.insert(It->second);
1915 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1916 auto Entry = m_Ptr2DeviceGlobal.find(DeviceGlobalPtr);
1917 assert(Entry != m_Ptr2DeviceGlobal.end() &&
"Device global entry not found");
1918 return Entry->second;
1922 const std::vector<std::string> &UniqueIds,
1923 bool ExcludeDeviceImageScopeDecorated) {
1924 std::vector<DeviceGlobalMapEntry *> FoundEntries;
1925 FoundEntries.reserve(UniqueIds.size());
1927 std::lock_guard<std::mutex> DeviceGlobalsGuard(m_DeviceGlobalsMutex);
1928 for (
const std::string &UniqueId : UniqueIds) {
1929 auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId);
1930 assert(DeviceGlobalEntry != m_DeviceGlobals.end() &&
1931 "Device global not found in map.");
1932 if (!ExcludeDeviceImageScopeDecorated ||
1933 !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)
1934 FoundEntries.push_back(DeviceGlobalEntry->second.get());
1936 return FoundEntries;
1940 const char *UniqueId) {
1941 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1943 auto ExistingHostPipe = m_HostPipes.find(UniqueId);
1944 if (ExistingHostPipe != m_HostPipes.end()) {
1945 ExistingHostPipe->second->initialize(HostPipePtr);
1946 m_Ptr2HostPipe.insert({HostPipePtr, ExistingHostPipe->second.get()});
1950 auto EntryUPtr = std::make_unique<HostPipeMapEntry>(UniqueId, HostPipePtr);
1951 auto NewEntry = m_HostPipes.emplace(UniqueId, std::move(EntryUPtr));
1952 m_Ptr2HostPipe.insert({HostPipePtr, NewEntry.first->second.get()});
1957 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1958 auto Entry = m_HostPipes.find(UniqueId);
1959 assert(Entry != m_HostPipes.end() &&
"Host pipe entry not found");
1960 return Entry->second.get();
1964 std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
1965 auto Entry = m_Ptr2HostPipe.find(HostPipePtr);
1966 assert(Entry != m_Ptr2HostPipe.end() &&
"Host pipe entry not found");
1967 return Entry->second;
1976 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
1979 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
1980 KernelIDs = m_BinImg2KernelIDs[BinImage];
1984 BinImage, Ctx, std::vector<device>{Dev}, ImgState, KernelIDs,
1987 return createSyclObjFromImpl<device_image_plain>(Impl);
1990 std::vector<device_image_plain>
1992 const context &Ctx,
const std::vector<device> &Devs,
1993 bundle_state TargetState,
const std::vector<kernel_id> &KernelIDs) {
1997 std::set<RTDeviceBinaryImage *> BinImages;
1998 if (!KernelIDs.empty()) {
1999 for (
const auto &KID : KernelIDs) {
2000 bool isCompatibleWithAtLeastOneDev =
2001 std::any_of(Devs.begin(), Devs.end(), [&KID](
const auto &Dev) {
2002 return sycl::is_compatible({KID}, Dev);
2004 if (!isCompatibleWithAtLeastOneDev)
2007 "Kernel is incompatible with all devices in devs");
2011 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2012 for (
auto &ImageUPtr : m_BinImg2KernelIDs) {
2013 BinImages.insert(ImageUPtr.first);
2026 for (
auto It = BinImages.begin(); It != BinImages.end();) {
2028 It = BinImages.erase(It);
2033 std::vector<device_image_plain> SYCLDeviceImages;
2041 struct DeviceBinaryImageInfo {
2042 std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
2044 int RequirementCounter = 0;
2046 std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
2050 using StateImagesPairT =
2051 std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
2052 using KernelImageMapT =
2053 std::map<kernel_id, StateImagesPairT, LessByNameComp>;
2054 KernelImageMapT KernelImageMap;
2055 if (!KernelIDs.empty())
2056 for (
const kernel_id &KernelID : KernelIDs)
2057 KernelImageMap.insert({KernelID, {}});
2059 for (RTDeviceBinaryImage *BinImage : BinImages) {
2064 auto InsertRes = ImageInfoMap.insert({BinImage, {}});
2065 DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
2066 if (InsertRes.second) {
2070 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2071 ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
2075 const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
2077 int &ImgRequirementCounter = ImgInfo.RequirementCounter;
2080 if (!ImageKernelIDs || ImageKernelIDs->empty())
2084 for (kernel_id &KernelID : *ImageKernelIDs) {
2085 StateImagesPairT *StateImagesPair;
2087 if (!KernelIDs.empty()) {
2088 auto It = KernelImageMap.find(KernelID);
2089 if (It == KernelImageMap.end())
2091 StateImagesPair = &It->second;
2093 StateImagesPair = &KernelImageMap[KernelID];
2095 auto &[KernelImagesState, KernelImages] = *StateImagesPair;
2097 if (KernelImages.empty()) {
2098 KernelImagesState = ImgState;
2099 KernelImages.push_back(BinImage);
2100 ++ImgRequirementCounter;
2101 }
else if (KernelImagesState < ImgState) {
2102 for (RTDeviceBinaryImage *Img : KernelImages) {
2103 auto It = ImageInfoMap.find(Img);
2104 assert(It != ImageInfoMap.end());
2105 assert(It->second.RequirementCounter > 0);
2106 --(It->second.RequirementCounter);
2108 KernelImages.clear();
2109 KernelImages.push_back(BinImage);
2110 KernelImagesState = ImgState;
2111 ++ImgRequirementCounter;
2112 }
else if (KernelImagesState == ImgState) {
2113 KernelImages.push_back(BinImage);
2114 ++ImgRequirementCounter;
2120 for (
const auto &ImgInfoPair : ImageInfoMap) {
2121 if (ImgInfoPair.second.RequirementCounter == 0)
2125 ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
2126 ImgInfoPair.second.KernelIDs,
nullptr);
2128 SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
2131 return SYCLDeviceImages;
2134 void ProgramManager::bringSYCLDeviceImagesToState(
2135 std::vector<device_image_plain> &DeviceImages,
bundle_state TargetState) {
2144 switch (TargetState) {
2163 switch (DevImageState) {
2174 std::vector<device_image_plain> LinkedDevImages =
2179 assert(LinkedDevImages.size() == 1 &&
"Expected one linked image here");
2180 DevImage = LinkedDevImages[0];
2194 std::vector<device_image_plain>
2195 ProgramManager::getSYCLDeviceImages(
const context &Ctx,
2196 const std::vector<device> &Devs,
2199 std::vector<device_image_plain> DeviceImages =
2200 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
2202 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
2203 return DeviceImages;
2206 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
2207 const context &Ctx,
const std::vector<device> &Devs,
2210 std::vector<device_image_plain> DeviceImages =
2211 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
2214 auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
2216 return !Selector(getSyclObjImpl(Image));
2218 DeviceImages.erase(It, DeviceImages.end());
2222 return DeviceImages;
2225 std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
2226 const context &Ctx,
const std::vector<device> &Devs,
2227 const std::vector<kernel_id> &KernelIDs,
bundle_state TargetState) {
2229 if (KernelIDs.empty())
2233 std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
2235 for (
auto &It : m_BuiltInKernelIDs) {
2236 if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
2239 "Attempting to use a built-in kernel. They are "
2240 "not fully supported");
2245 std::vector<device_image_plain> DeviceImages =
2246 getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
2249 bringSYCLDeviceImagesToState(DeviceImages, TargetState);
2250 return DeviceImages;
2255 const std::vector<device> &Devs,
2262 const std::shared_ptr<device_image_impl> &InputImpl =
2269 if (InputImpl->get_bin_image_ref()->getFormat() !=
2276 "Creating a program from AOT binary for multiple device is "
2281 ur_program_handle_t Prog = createURProgram(*InputImpl->get_bin_image_ref(),
2282 InputImpl->get_context(), Devs[0]);
2284 if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
2288 InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
2290 InputImpl->get_spec_const_data_ref(),
2291 InputImpl->get_spec_const_blob_ref());
2293 std::vector<ur_device_handle_t> URDevices;
2294 URDevices.reserve(Devs.size());
2295 for (
const device &Dev : Devs)
2299 std::string CompileOptions;
2302 CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin);
2306 Plugin, ObjectImpl->get_ur_program_ref(), Devs.size(), URDevices.data(),
2308 CompileOptions.c_str());
2309 if (Error != UR_RESULT_SUCCESS)
2312 getProgramBuildLog(ObjectImpl->get_ur_program_ref(),
2315 return createSyclObjFromImpl<device_image_plain>(ObjectImpl);
2318 std::vector<device_image_plain>
2320 const std::vector<device> &Devs,
2324 std::vector<ur_program_handle_t> URPrograms;
2325 URPrograms.push_back(
getSyclObjImpl(DeviceImage)->get_ur_program_ref());
2327 std::vector<ur_device_handle_t> URDevices;
2328 URDevices.reserve(Devs.size());
2329 for (
const device &Dev : Devs)
2332 std::string LinkOptionsStr;
2334 if (LinkOptionsStr.empty()) {
2335 const std::shared_ptr<device_image_impl> &InputImpl =
2338 *(InputImpl->get_bin_image_ref()));
2344 const PluginPtr &Plugin = ContextImpl->getPlugin();
2346 ur_program_handle_t LinkedProg =
nullptr;
2348 auto Res = Plugin->call_nocheck(
2349 urProgramLinkExp, ContextImpl->getHandleRef(), URDevices.size(),
2350 URDevices.data(), URPrograms.size(), URPrograms.data(),
2351 LinkOptionsStr.c_str(), &LinkedProg);
2352 if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
2353 Res = Plugin->call_nocheck(urProgramLink, ContextImpl->getHandleRef(),
2354 URPrograms.size(), URPrograms.data(),
2355 LinkOptionsStr.c_str(), &LinkedProg);
2359 ur_result_t Error = doLink();
2360 if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES ||
2361 Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
2362 ContextImpl->getKernelProgramCache().reset();
2366 if (Error != UR_RESULT_SUCCESS) {
2368 const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl);
2375 std::shared_ptr<std::vector<kernel_id>> KernelIDs{
new std::vector<kernel_id>};
2376 std::vector<unsigned char> NewSpecConstBlob;
2379 std::shared_ptr<device_image_impl> DeviceImageImpl =
2383 KernelIDs->insert(KernelIDs->end(),
2384 DeviceImageImpl->get_kernel_ids_ptr()->begin(),
2385 DeviceImageImpl->get_kernel_ids_ptr()->end());
2391 const std::lock_guard<std::mutex> SpecConstLock(
2392 DeviceImageImpl->get_spec_const_data_lock());
2396 for (
const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) {
2397 std::vector<device_image_impl::SpecConstDescT> &NewDescEntries =
2398 NewSpecConstMap[SpecConstIt.first];
2399 assert(NewDescEntries.empty() &&
2400 "Specialization constant already exists in the map.");
2401 NewDescEntries.reserve(SpecConstIt.second.size());
2403 SpecConstIt.second) {
2405 NewSpecConstDesc.
BlobOffset += NewSpecConstBlob.size();
2406 NewDescEntries.push_back(std::move(NewSpecConstDesc));
2412 NewSpecConstBlob.insert(NewSpecConstBlob.end(),
2413 DeviceImageImpl->get_spec_const_blob_ref().begin(),
2414 DeviceImageImpl->get_spec_const_blob_ref().end());
2422 std::make_shared<detail::device_image_impl>(
2424 LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob));
2428 return {createSyclObjFromImpl<device_image_plain>(ExecutableImpl)};
2436 const std::vector<device> &Devs,
2440 const std::shared_ptr<device_image_impl> &InputImpl =
2443 const context Context = InputImpl->get_context();
2449 std::string CompileOpts;
2450 std::string LinkOpts;
2456 SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();
2459 auto BuildF = [
this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts,
2460 &InputImpl, SpecConsts] {
2462 const PluginPtr &Plugin = ContextImpl->getPlugin();
2468 if (InputImpl->get_bin_image_ref()->getFormat() !=
2475 "Creating a program from AOT binary for multiple device "
2476 "is not supported");
2480 auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram(
2481 Img, {&Img}, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);
2483 if (!DeviceCodeWasInCache &&
2484 InputImpl->get_bin_image_ref()->supportsSpecConstants())
2487 ProgramPtr ProgramManaged(NativePrg, urProgramRelease);
2494 uint32_t DeviceLibReqMask = 0;
2497 DeviceLibReqMask = getDeviceLibReqMask(Img);
2500 std::vector<ur_program_handle_t> ExtraProgramsToLink;
2501 ProgramPtr BuiltProgram =
2502 build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts,
2504 ExtraProgramsToLink);
2509 std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
2510 NativePrograms.insert({BuiltProgram.get(), &Img});
2513 ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img);
2516 if (!DeviceCodeWasInCache)
2517 PersistentDeviceCodeCache::putItemToDisc(Devs[0], {&Img}, SpecConsts,
2518 CompileOpts + LinkOpts,
2519 BuiltProgram.
get());
2521 return BuiltProgram.release();
2525 auto ResProgram = BuildF();
2528 InputImpl->get_kernel_ids_ptr(), ResProgram,
2529 InputImpl->get_spec_const_data_ref(),
2530 InputImpl->get_spec_const_blob_ref());
2532 return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2536 ur_device_handle_t UrDevice =
getSyclObjImpl(Devs[0]).get()->getHandleRef();
2538 std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), UrDevice);
2542 auto GetCachedBuildF = [&Cache, &CacheKey]() {
2548 assert(BuildResult !=
nullptr &&
"Invalid build result");
2550 ur_program_handle_t ResProgram = BuildResult->Val;
2555 const PluginPtr &Plugin = ContextImpl->getPlugin();
2556 auto CacheOtherDevices = [ResProgram, &Plugin]() {
2557 Plugin->call(urProgramRetain, ResProgram);
2563 for (
size_t Idx = 1; Idx < Devs.size(); ++Idx) {
2564 const ur_device_handle_t UrDeviceAdd =
2568 CacheKey.second = UrDeviceAdd;
2571 assert(BuildResult !=
nullptr &&
"Invalid build result");
2577 Plugin->call(urProgramRetain, ResProgram);
2581 InputImpl->get_kernel_ids_ptr(), ResProgram,
2582 InputImpl->get_spec_const_data_ref(),
2583 InputImpl->get_spec_const_blob_ref());
2585 return createSyclObjFromImpl<device_image_plain>(ExecImpl);
2590 std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *>
2591 ProgramManager::getOrCreateKernel(
const context &Context,
2592 const std::string &KernelName,
2594 ur_program_handle_t Program) {
2602 auto BuildF = [
this, &Program, &KernelName, &Ctx] {
2603 ur_kernel_handle_t
Kernel =
nullptr;
2605 const PluginPtr &Plugin = Ctx->getPlugin();
2606 Plugin->call(urKernelCreate, Program, KernelName.c_str(), &
Kernel);
2609 if (Ctx->getPlatformImpl()->supports_usm()) {
2610 bool EnableAccess =
true;
2611 Plugin->call(urKernelSetExecInfo,
Kernel,
2612 UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS,
sizeof(ur_bool_t),
2613 nullptr, &EnableAccess);
2619 getEliminatedKernelArgMask(Program, KernelName);
2624 auto GetCachedBuildF = [&Cache, &KernelName, Program]() {
2632 auto [
Kernel, ArgMask] = BuildF();
2638 assert(BuildResult !=
nullptr &&
"Invalid build result");
2643 Ctx->getPlugin()->call(urKernelRetain, BuildResult->Val.first);
2645 &(BuildResult->MBuildResultMutex),
2646 BuildResult->Val.second);
2649 ur_kernel_handle_t ProgramManager::getCachedMaterializedKernel(
2650 const std::string &KernelName,
2651 const std::vector<unsigned char> &SpecializationConsts) {
2653 std::cerr <<
">>> ProgramManager::getCachedMaterializedKernel\n"
2654 <<
"KernelName: " << KernelName <<
"\n";
2657 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2658 if (
auto KnownMaterializations = m_MaterializedKernels.find(KernelName);
2659 KnownMaterializations != m_MaterializedKernels.end()) {
2661 std::cerr <<
">>> There are:" << KnownMaterializations->second.size()
2662 <<
" materialized kernels.\n";
2664 KnownMaterializations->second.find(SpecializationConsts);
2665 Kernel != KnownMaterializations->second.end()) {
2667 std::cerr <<
">>> Kernel in the chache\n";
2674 std::cerr <<
">>> Kernel not in the chache\n";
2679 ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel(
2681 const device &Device,
const std::string &KernelName,
2682 const std::vector<unsigned char> &SpecializationConsts) {
2685 std::cerr <<
">>> ProgramManager::getOrCreateMaterializedKernel\n"
2686 <<
"KernelName: " << KernelName <<
"\n";
2688 if (
auto CachedKernel =
2689 getCachedMaterializedKernel(KernelName, SpecializationConsts))
2690 return CachedKernel;
2693 std::cerr <<
">>> Adding the kernel to the cache.\n";
2694 auto Program = createURProgram(Img, Context, Device);
2696 auto &Plugin = DeviceImpl->getPlugin();
2697 ProgramPtr ProgramManaged(Program, urProgramRelease);
2699 std::string CompileOpts;
2700 std::string LinkOpts;
2703 std::vector<ur_program_handle_t> ExtraProgramsToLink;
2706 CompileOpts, LinkOpts, DeviceImpl->getHandleRef(),
2708 ExtraProgramsToLink);
2709 ur_kernel_handle_t UrKernel{
nullptr};
2711 BuildProgram.get(), KernelName.c_str(), &UrKernel);
2713 std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2714 m_MaterializedKernels[KernelName][SpecializationConsts] = UrKernel;
2726 #define __SYCL_ASPECT(ASPECT, ID) \
2727 case aspect::ASPECT: \
2729 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
2732 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
2733 switch (AspectNum) {
2734 #include <sycl/info/aspects.def>
2735 #include <sycl/info/aspects_deprecated.def>
2739 std::to_string(
static_cast<unsigned>(AspectNum)));
2740 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2741 #undef __SYCL_ASPECT_DEPRECATED
2742 #undef __SYCL_ASPECT
2746 template <
typename T>
2747 static std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
2757 namespace matrix_ext = ext::oneapi::experimental::matrix;
2762 std::optional<matrix_ext::matrix_type>
2764 const std::string &MatrixTypeString) {
2765 assert(!MatrixTypeString.empty() &&
2766 "MatrixTypeString type string can't be empty. Check if required "
2767 "template specialization for convertTypeToMatrixTypeString exists.");
2768 std::string_view MatrixTypeStringView = MatrixTypeString;
2769 std::string Prefix(
"matrix_type::");
2770 assert((MatrixTypeStringView.substr(0, Prefix.size()) == Prefix) &&
2771 "MatrixTypeString has incorrect prefix, should be \"matrix_type::\".");
2772 MatrixTypeStringView.remove_prefix(Prefix.size());
2773 if (
"bf16" == MatrixTypeStringView)
2774 return matrix_ext::matrix_type::bf16;
2775 else if (
"fp16" == MatrixTypeStringView)
2776 return matrix_ext::matrix_type::fp16;
2777 else if (
"tf32" == MatrixTypeStringView)
2778 return matrix_ext::matrix_type::tf32;
2779 else if (
"fp32" == MatrixTypeStringView)
2780 return matrix_ext::matrix_type::fp32;
2781 else if (
"fp64" == MatrixTypeStringView)
2782 return matrix_ext::matrix_type::fp64;
2783 else if (
"sint8" == MatrixTypeStringView)
2784 return matrix_ext::matrix_type::sint8;
2785 else if (
"sint16" == MatrixTypeStringView)
2786 return matrix_ext::matrix_type::sint16;
2787 else if (
"sint32" == MatrixTypeStringView)
2788 return matrix_ext::matrix_type::sint32;
2789 else if (
"sint64" == MatrixTypeStringView)
2790 return matrix_ext::matrix_type::sint64;
2791 else if (
"uint8" == MatrixTypeStringView)
2792 return matrix_ext::matrix_type::uint8;
2793 else if (
"uint16" == MatrixTypeStringView)
2794 return matrix_ext::matrix_type::uint16;
2795 else if (
"uint32" == MatrixTypeStringView)
2796 return matrix_ext::matrix_type::uint32;
2797 else if (
"uint64" == MatrixTypeStringView)
2798 return matrix_ext::matrix_type::uint64;
2799 return std::nullopt;
2803 size_t RowsUser,
size_t ColsUser,
2805 size_t MaxRowsRuntime,
size_t MaxColsRuntime,
2806 size_t RowsRuntime,
size_t ColsRuntime) {
2807 std::optional<matrix_ext::matrix_type> MatrixTypeUserOpt =
2809 if (!MatrixTypeUserOpt)
2811 bool IsMatrixTypeSupported = (MatrixTypeUserOpt.value() == MatrixTypeRuntime);
2812 bool IsRowsSupported = ((RowsRuntime != 0) ? (RowsUser == RowsRuntime)
2813 : (RowsUser <= MaxRowsRuntime));
2814 bool IsColsSupported = ((ColsRuntime != 0) ? (ColsUser == ColsRuntime)
2815 : (ColsUser <= MaxColsRuntime));
2816 return IsMatrixTypeSupported && IsRowsSupported && IsColsSupported;
2820 const std::string &JointMatrixProStr,
2821 const std::vector<ext::oneapi::experimental::matrix::combination>
2822 &SupportedMatrixCombinations) {
2823 std::istringstream JointMatrixStrStream(JointMatrixProStr);
2824 std::string SingleJointMatrix;
2828 while (std::getline(JointMatrixStrStream, SingleJointMatrix,
';')) {
2829 std::istringstream SingleJointMatrixStrStream(SingleJointMatrix);
2830 std::vector<std::string> JointMatrixVec;
2833 while (std::getline(SingleJointMatrixStrStream, Item,
',')) {
2834 JointMatrixVec.push_back(Item);
2837 assert(JointMatrixVec.size() == 4 &&
2838 "Property set is corrupted, it must have 4 elements.");
2840 const std::string &MatrixTypeUser = JointMatrixVec[0];
2841 const std::string &UseStrUser = JointMatrixVec[1];
2842 size_t RowsUser, ColsUser = 0;
2844 RowsUser = std::stoi(JointMatrixVec[2]);
2845 ColsUser = std::stoi(JointMatrixVec[3]);
2846 }
catch (std::logic_error &) {
2852 bool IsMatrixCompatible =
false;
2854 for (
const auto &Combination : SupportedMatrixCombinations) {
2855 std::optional<ext::oneapi::experimental::matrix::use> Use =
2857 assert(Use &&
"Property set has empty matrix::use value.");
2858 switch (Use.value()) {
2861 MatrixTypeUser, RowsUser, ColsUser, Combination.atype,
2862 Combination.max_msize, Combination.max_ksize, Combination.msize,
2867 MatrixTypeUser, RowsUser, ColsUser, Combination.btype,
2868 Combination.max_ksize, Combination.max_nsize, Combination.ksize,
2871 case matrix_ext::use::accumulator: {
2873 MatrixTypeUser, RowsUser, ColsUser, Combination.ctype,
2874 Combination.max_msize, Combination.max_nsize, Combination.msize,
2877 MatrixTypeUser, RowsUser, ColsUser, Combination.dtype,
2878 Combination.max_msize, Combination.max_nsize, Combination.msize,
2885 if (IsMatrixCompatible)
2889 if (!IsMatrixCompatible)
2891 "joint_matrix with parameters " + MatrixTypeUser +
2893 ", Rows=" + std::to_string(RowsUser) +
2894 ", Cols=" + std::to_string(ColsUser) +
2895 " is not supported on this device");
2897 return std::nullopt;
2901 const std::string &JointMatrixProStr,
2902 const std::vector<ext::oneapi::experimental::matrix::combination>
2903 &SupportedMatrixCombinations) {
2904 std::istringstream JointMatrixMadStrStream(JointMatrixProStr);
2905 std::string SingleJointMatrixMad;
2909 while (std::getline(JointMatrixMadStrStream, SingleJointMatrixMad,
';')) {
2910 std::istringstream SingleJointMatrixMadStrStream(SingleJointMatrixMad);
2911 std::vector<std::string> JointMatrixMadVec;
2914 while (std::getline(SingleJointMatrixMadStrStream, Item,
',')) {
2915 JointMatrixMadVec.push_back(Item);
2918 assert(JointMatrixMadVec.size() == 7 &&
2919 "Property set is corrupted, it must have 7 elements.");
2921 const std::string &MatrixTypeAStrUser = JointMatrixMadVec[0];
2922 const std::string &MatrixTypeBStrUser = JointMatrixMadVec[1];
2923 const std::string &MatrixTypeCStrUser = JointMatrixMadVec[2];
2924 const std::string &MatrixTypeDStrUser = JointMatrixMadVec[3];
2925 size_t MSizeUser, KSizeUser, NSizeUser = 0;
2927 MSizeUser = std::stoi(JointMatrixMadVec[4]);
2928 KSizeUser = std::stoi(JointMatrixMadVec[5]);
2929 NSizeUser = std::stoi(JointMatrixMadVec[6]);
2930 }
catch (std::logic_error &) {
2936 std::optional<matrix_ext::matrix_type> MatrixTypeAUserOpt =
2938 std::optional<matrix_ext::matrix_type> MatrixTypeBUserOpt =
2940 std::optional<matrix_ext::matrix_type> MatrixTypeCUserOpt =
2942 std::optional<matrix_ext::matrix_type> MatrixTypeDUserOpt =
2945 bool IsMatrixMadCompatible =
false;
2947 for (
const auto &Combination : SupportedMatrixCombinations) {
2948 if (!MatrixTypeAUserOpt || !MatrixTypeBUserOpt || !MatrixTypeCUserOpt ||
2949 !MatrixTypeDUserOpt)
2952 bool IsMatrixTypeACompatible =
2953 (MatrixTypeAUserOpt.value() == Combination.atype);
2954 bool IsMatrixTypeBCompatible =
2955 (MatrixTypeBUserOpt.value() == Combination.btype);
2956 bool IsMatrixTypeCCompatible =
2957 (MatrixTypeCUserOpt.value() == Combination.ctype);
2958 bool IsMatrixTypeDCompatible =
2959 (MatrixTypeDUserOpt.value() == Combination.dtype);
2960 bool IsMSizeCompatible =
2961 ((Combination.msize != 0) ? (MSizeUser == Combination.msize)
2962 : (MSizeUser <= Combination.max_msize));
2963 bool IsKSizeCompatible =
2964 ((Combination.ksize != 0) ? (KSizeUser == Combination.ksize)
2965 : (KSizeUser <= Combination.max_ksize));
2966 bool IsNSizeCompatible =
2967 ((Combination.nsize != 0) ? (NSizeUser == Combination.nsize)
2968 : (NSizeUser <= Combination.max_nsize));
2970 IsMatrixMadCompatible =
2971 IsMatrixTypeACompatible && IsMatrixTypeBCompatible &&
2972 IsMatrixTypeCCompatible && IsMatrixTypeDCompatible &&
2973 IsMSizeCompatible && IsKSizeCompatible && IsNSizeCompatible;
2976 if (IsMatrixMadCompatible)
2980 if (!IsMatrixMadCompatible)
2983 "joint_matrix_mad function with parameters atype=" +
2984 MatrixTypeAStrUser +
", btype=" + MatrixTypeBStrUser +
2985 ", ctype=" + MatrixTypeCStrUser +
", dtype=" +
2986 MatrixTypeDStrUser +
", M=" + std::to_string(MSizeUser) +
", K=" +
2987 std::to_string(KSizeUser) +
", N=" + std::to_string(NSizeUser) +
2988 " is not supported on this "
2991 return std::nullopt;
2994 std::optional<sycl::exception>
2998 auto getPropIt = [&Img](
const std::string &PropName) {
3001 PropRange.begin(), PropRange.end(),
3003 return (*Prop)->Name == PropName;
3005 return (PropIt == PropRange.end())
3011 auto AspectsPropIt = getPropIt(
"aspects");
3012 auto JointMatrixPropIt = getPropIt(
"joint_matrix");
3013 auto JointMatrixMadPropIt = getPropIt(
"joint_matrix_mad");
3014 auto ReqdWGSizeUint32TPropIt = getPropIt(
"reqd_work_group_size");
3015 auto ReqdWGSizeUint64TPropIt = getPropIt(
"reqd_work_group_size_uint64_t");
3016 auto ReqdSubGroupSizePropIt = getPropIt(
"reqd_sub_group_size");
3017 auto WorkGroupNumDim = getPropIt(
"work_group_num_dim");
3020 if (AspectsPropIt) {
3025 while (!Aspects.
empty()) {
3026 aspect Aspect = Aspects.
consume<aspect>();
3027 if (!Dev.
has(Aspect))
3030 " is not supported on the device");
3034 if (JointMatrixPropIt) {
3035 std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
3037 ext::oneapi::experimental::info::device::matrix_combinations>();
3039 if (Combinations.empty())
3041 "no matrix hardware on the target device, "
3042 "joint_matrix is not supported");
3048 std::string JointMatrixByteArrayToStr;
3049 while (!JointMatrixByteArray.
empty()) {
3050 JointMatrixByteArrayToStr += JointMatrixByteArray.
consume<
char>();
3052 std::optional<sycl::exception> Result =
3055 return Result.value();
3058 if (JointMatrixMadPropIt) {
3059 std::vector<ext::oneapi::experimental::matrix::combination> Combinations =
3061 ext::oneapi::experimental::info::device::matrix_combinations>();
3063 if (Combinations.empty())
3065 "no matrix hardware on the target device, "
3066 "joint_matrix_mad is not supported");
3072 std::string JointMatrixMadByteArrayToStr;
3073 while (!JointMatrixMadByteArray.
empty()) {
3074 JointMatrixMadByteArrayToStr += JointMatrixMadByteArray.
consume<
char>();
3077 JointMatrixMadByteArrayToStr, Combinations);
3079 return Result.value();
3083 if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
3090 bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value();
3091 auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
3096 uint64_t ReqdWGSizeAllDimsTotal = 1;
3097 std::vector<uint64_t> ReqdWGSizeVec;
3099 while (!ReqdWGSize.
empty()) {
3100 uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.
consume<uint64_t>()
3101 : ReqdWGSize.
consume<uint32_t>();
3104 ReqdWGSizeAllDimsTotal = *res;
3107 sycl::errc::kernel_not_supported,
3108 "Required work-group size is not supported"
3109 " (total number of work-items requested can't fit into size_t)");
3110 ReqdWGSizeVec.push_back(SingleDimSize);
3114 size_t UserProvidedNumDims = 0;
3115 if (WorkGroupNumDim) {
3119 UserProvidedNumDims =
3122 for (
unsigned i = UserProvidedNumDims; i < 3; ++i)
3123 assert(ReqdWGSizeVec[i] == 1 &&
3124 "Incorrect padding in required work-group size metadata.");
3127 UserProvidedNumDims = Dims;
3130 if (NDRDesc.
Dims != 0 && NDRDesc.
Dims != UserProvidedNumDims)
3133 "The local size dimension of submitted nd_range doesn't match the "
3134 "required work-group size dimension");
3139 if (ReqdWGSizeAllDimsTotal >
3140 Dev.
get_info<info::device::max_work_group_size>())
3142 "Required work-group size " +
3143 std::to_string(ReqdWGSizeAllDimsTotal) +
3144 " is not supported on the device");
3147 std::variant<id<1>,
id<2>,
id<3>> MaxWorkItemSizesVariant;
3149 MaxWorkItemSizesVariant =
3152 MaxWorkItemSizesVariant =
3155 MaxWorkItemSizesVariant =
3157 for (
int i = 0; i < Dims; i++) {
3162 if (ReqdWGSizeVec[i] >
3165 "Required work-group size " +
3166 std::to_string(ReqdWGSizeVec[i]) +
3167 " is not supported");
3168 }
else if (Dims == 2) {
3169 if (ReqdWGSizeVec[i] >
3172 "Required work-group size " +
3173 std::to_string(ReqdWGSizeVec[i]) +
3174 " is not supported");
3176 if (ReqdWGSizeVec[i] >
3179 "Required work-group size " +
3180 std::to_string(ReqdWGSizeVec[i]) +
3181 " is not supported");
3186 if (ReqdSubGroupSizePropIt) {
3187 auto ReqdSubGroupSize =
3189 auto SupportedSubGroupSizes = Dev.
get_info<info::device::sub_group_sizes>();
3196 SupportedSubGroupSizes.cend(),
3197 [=](
auto s) { return s == ReqdSubGroupSize; }))
3200 std::to_string(ReqdSubGroupSize) +
3201 " is not supported on the device");
3212 sycl::detail::ProgramManager::getInstance().addImages(desc);
The context class represents a SYCL context on which kernel functions may be executed.
std::vector< device > get_devices() const
Gets devices associated with this SYCL context.
platform get_platform() const
Gets platform associated with this SYCL context.
void dropBytes(std::size_t Bytes)
ByteArray asByteArray() const
const char * asCString() const
uint32_t asUint32() const
ProgramManager & getProgramManager()
static GlobalHandler & instance()
bool insertBuiltProgram(const ProgramCacheKeyT &CacheKey, ur_program_handle_t Program)
auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
std::pair< KernelBuildResultPtr, bool > getOrInsertKernel(ur_program_handle_t Program, const std::string &KernelName)
std::pair< ur_kernel_handle_t, const KernelArgMask * > KernelArgMaskPairT
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
std::pair< ProgramBuildResultPtr, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
static constexpr const char * DirSep
static std::string getCurrentDSODir()
Returns an absolute path to a directory where the object was found.
static std::vector< std::vector< char > > getItemFromDisc(const device &Device, const std::vector< const RTDeviceBinaryImage * > &Imgs, const SerializedObj &SpecConsts, const std::string &BuildOptionsString)
static void putItemToDisc(const device &Device, const std::vector< const RTDeviceBinaryImage * > &Imgs, const SerializedObj &SpecConsts, const std::string &BuildOptionsString, const ur_program_handle_t &NativePrg)
ur_program_handle_t getBuiltURProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={}, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
void debugPrintBinaryImages() const
std::tuple< ur_kernel_handle_t, std::mutex *, const KernelArgMask *, ur_program_handle_t > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={})
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
kernel_id getBuiltInKernelID(const std::string &KernelName)
void addImages(sycl_device_binaries DeviceImages)
static ProgramManager & getInstance()
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
const KernelArgMask * getEliminatedKernelArgMask(ur_program_handle_t NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
kernel_id getSYCLKernelID(const std::string &KernelName)
static std::string getProgramBuildLog(const ur_program_handle_t &Program, const ContextImplPtr Context)
std::pair< ur_program_handle_t, bool > getOrCreateURProgram(const RTDeviceBinaryImage &Img, const std::vector< const RTDeviceBinaryImage * > &AllImages, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a UR program using either a cached device code binary if present in the persistent cache or f...
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
bool kernelUsesAssert(const std::string &KernelName) const
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr Context)
bool hasCompatibleImage(const device &Dev)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::vector< kernel_id > getAllSYCLKernelIDs()
ConstIterator begin() const
virtual void dump(std::ostream &Out) const
const sycl_device_binary_struct & getRawData() const
const char * getLinkOptions() const
ur::DeviceBinaryType getFormat() const
Returns the format of the binary image.
const std::vector< ur_program_metadata_t > & getProgramMetadataUR() const
std::uintptr_t getImageID() const
bool supportsSpecConstants() const
sycl_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
sycl_device_binary get() const
const PropertyRange & getDeviceRequirements() const
const char * getCompileOptions() const
virtual void print() const
const PropertyRange & getDeviceLibReqMask() const
static const char * get()
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
bool has(aspect Aspect) const __SYCL_WARN_IMAGE_ASPECT(Aspect)
Indicates if the SYCL device has the given feature.
A unique identifier of an item in an index space.
Objects of the class identify kernel is some kernel_bundle related APIs.
Objects of the property_list class are containers for the SYCL properties.
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA
#define __SYCL_DEVICE_BINARY_TARGET_AMDGCN
#define __SYCL_DEVICE_BINARY_TARGET_NVPTX64
PTX 64-bit image <-> "nvptx64", 64-bit NVIDIA PTX device.
sycl_device_binary_type
Types of device binary.
@ SYCL_DEVICE_BINARY_TYPE_SPIRV
@ SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE
@ SYCL_DEVICE_BINARY_TYPE_NONE
@ SYCL_DEVICE_BINARY_TYPE_NATIVE
#define __SYCL_DEVICE_BINARY_TARGET_UNKNOWN
Target identification strings.
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
#define __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64
Device-specific binary images produced from SPIR-V 64-bit <-> various "spir64_*" triples for specific...
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN
#define __SYCL_DEVICE_BINARY_TARGET_SPIRV32
SPIR-V 32-bit image <-> "spir", 32-bit OpenCL device.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
__SYCL_EXTERN_STREAM_ATTRS ostream clog
Linked to standard error (buffered)
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
sycl_device_binary_type getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize)
Tries to determine the device binary image foramat.
ur_program_metadata_t mapDeviceBinaryPropertyToProgramMetadata(const sycl_device_binary_property &DeviceBinaryProperty)
::sycl_device_binary_type DeviceBinaryType
static void applyLinkOptionsFromEnvironment(std::string &LinkOpts)
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc)
void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image, bool JITCompilationIsRequired)
static constexpr int DbgProgMgr
std::shared_ptr< device_image_impl > DeviceImageImplPtr
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, ur_program_handle_t &Prog)
static void applyOptionsFromEnvironment(std::string &CompileOpts, std::string &LinkOpts)
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
std::optional< matrix_ext::matrix_type > convertMatrixTypeStringMatrixTypeEnumValue(const std::string &MatrixTypeString)
static const char * getDeviceLibFilename(DeviceLibExt Extension, bool Native)
static bool isDeviceBinaryTypeSupported(const context &C, ur::DeviceBinaryType Format)
static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, const char *PropName)
std::vector< bool > KernelArgMask
static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask)
@ cl_intel_devicelib_math_fp64
@ cl_intel_devicelib_cstring
@ cl_intel_devicelib_imf_bf16
@ cl_intel_devicelib_math
@ cl_intel_devicelib_complex_fp64
@ cl_intel_devicelib_assert
@ cl_intel_devicelib_imf_fp64
@ cl_intel_devicelib_bfloat16
@ cl_intel_devicelib_complex
static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, const char *PropName)
static const char * getUrDeviceTarget(const char *URDeviceTarget)
static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, ur_device_handle_t Device, bool UseNativeLib)
static void appendLinkOptionsFromImage(std::string &LinkOpts, const RTDeviceBinaryImage &Img)
static ur_program_handle_t createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen)
static constexpr uint32_t ITTSpecConstId
KernelArgMask createKernelArgMask(const ByteArray &Bytes)
static std::string getAspectNameStr(sycl::aspect AspectNum)
static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
static void appendCompileOptionsForGRFSizeProperties(std::string &CompileOpts, const RTDeviceBinaryImage &Img, bool IsEsimdImage)
static void applyCompileOptionsFromEnvironment(std::string &CompileOpts)
static const std::map< DeviceLibExt, const char * > DeviceLibExtensionStrs
std::optional< sycl::exception > checkDevSupportJointMatrix(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
constexpr std::optional< ext::oneapi::experimental::matrix::use > convertMatrixUseStringToEnum(const char *UseString)
static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devs, const PluginPtr &)
RTDeviceBinaryImage * getBinImageFromMultiMap(const std::unordered_multimap< StorageKey, RTDeviceBinaryImage * > &ImagesSet, const StorageKey &Key, const context &Context, const device &Device)
static void setSpecializationConstants(const std::shared_ptr< device_image_impl > &InputImpl, ur_program_handle_t Prog, const PluginPtr &Plugin)
std::vector< std::string > split_string(std::string_view str, char delimeter)
std::shared_ptr< plugin > PluginPtr
static ur_result_t doCompile(const PluginPtr &Plugin, ur_program_handle_t Program, uint32_t NumDevs, ur_device_handle_t *Devs, ur_context_handle_t Ctx, const char *Opts)
std::shared_ptr< device_impl > DeviceImplPtr
std::optional< sycl::exception > checkDevSupportJointMatrixMad(const std::string &JointMatrixProStr, const std::vector< ext::oneapi::experimental::matrix::combination > &SupportedMatrixCombinations)
static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img, const std::vector< device > &Devices, const PluginPtr &Plugin)
static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev)
bool isMatrixSupportedByHW(const std::string &MatrixTypeStrUser, size_t RowsUser, size_t ColsUser, matrix_ext::matrix_type MatrixTypeRuntime, size_t MaxRowsRuntime, size_t MaxColsRuntime, size_t RowsRuntime, size_t ColsRuntime)
constexpr tuple< Ts... > make_tuple(Ts... Args)
static std::enable_if_t< std::is_unsigned_v< T >, std::optional< T > > multiply_with_overflow_check(T x, T y)
static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts)
static const char * getDeviceLibExtensionStr(DeviceLibExt Extension)
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
static constexpr char UseSpvEnv("SYCL_USE_KERNEL_SPV")
static void appendCompileEnvironmentVariablesThatAppend(std::string &CompileOpts)
static void emitBuiltProgramInfo(const ur_program_handle_t &Prog, const ContextImplPtr &Context)
Emits information about built programs if the appropriate contitions are met, namely when SYCL_RT_WAR...
std::vector< unsigned char > SerializedObj
exception set_ur_error(exception &&e, int32_t ur_err)
static const std::map< DeviceLibExt, std::pair< const char *, const char * > > DeviceLibNames
static std::vector< ur_program_handle_t > getDeviceLibPrograms(const ContextImplPtr Context, const ur_device_handle_t &Device, uint32_t DeviceLibReqMask)
static ur_program_handle_t createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, const std::vector< ur_program_metadata_t > Metadata)
static const char * getFormatStr(ur::DeviceBinaryType Format)
static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog, const PluginPtr &Plugin)
This function enables ITT annotations in SPIR-V module by setting a specialization constant if INTEL_...
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
void __sycl_unregister_lib(sycl_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
void __sycl_register_lib(sycl_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
bool none_of(const simd_mask< _Tp, _Abi > &) noexcept
This struct is a record of all the device code that may be offloaded.
uint16_t NumDeviceBinaries
Number of device binaries in this descriptor.
sycl_device_binary DeviceBinaries
Device binaries data.
This struct is a record of the device binary information.
sycl_offload_entry EntriesEnd
sycl_offload_entry EntriesBegin
the offload entry table
const unsigned char * BinaryStart
Pointer to the target code start.
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
const unsigned char * BinaryEnd
Pointer to the target code end.
C++ utilities for Unified Runtime integration.