DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.hpp
Go to the documentation of this file.
1 //==------ program_manager.hpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
13 #include <sycl/detail/common.hpp>
15 #include <sycl/detail/export.hpp>
16 #include <sycl/detail/os_util.hpp>
17 #include <sycl/detail/pi.hpp>
18 #include <sycl/detail/util.hpp>
19 #include <sycl/device.hpp>
20 #include <sycl/kernel_bundle.hpp>
21 #include <sycl/stl.hpp>
22 
23 #include <cstdint>
24 #include <map>
25 #include <memory>
26 #include <set>
27 #include <unordered_map>
28 #include <unordered_set>
29 #include <vector>
30 
31 // +++ Entry points referenced by the offload wrapper object {
32 
35 extern "C" __SYCL_EXPORT void __sycl_register_lib(pi_device_binaries desc);
36 
40 extern "C" __SYCL_EXPORT void __sycl_unregister_lib(pi_device_binaries desc);
41 
42 // +++ }
43 
44 namespace sycl {
46 class context;
47 namespace detail {
48 
49 bool doesDevSupportDeviceRequirements(const device &Dev,
50  const RTDeviceBinaryImage &BinImages);
51 
52 // This value must be the same as in libdevice/device_itt.h.
53 // See sycl/doc/design/ITTAnnotations.md for more info.
54 static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
55 
56 class context_impl;
57 using ContextImplPtr = std::shared_ptr<context_impl>;
58 class device_impl;
59 using DeviceImplPtr = std::shared_ptr<device_impl>;
60 class program_impl;
61 class queue_impl;
62 class event_impl;
63 // DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
64 // If any update is made here, need to sync with DeviceLibExt definition
65 // in llvm/tools/sycl-post-link/sycl-post-link.cpp
66 enum class DeviceLibExt : std::uint32_t {
77 };
78 
79 // Provides single loading and building OpenCL programs with unique contexts
80 // that is necessary for no interoperability cases with lambda.
82 public:
83  // TODO use a custom dynamic bitset instead to make initialization simpler.
84  using KernelArgMask = std::vector<bool>;
85 
86  // Returns the single instance of the program manager for the entire
87  // process. Can only be called after staticInit is done.
88  static ProgramManager &getInstance();
89  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M,
90  const std::string &KernelName,
91  const context &Context,
92  const device &Device,
93  bool JITCompilationIsRequired = false);
94  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId,
95  const context &Context,
96  const device &Device,
97  bool JITCompilationIsRequired = false);
98  RT::PiProgram createPIProgram(const RTDeviceBinaryImage &Img,
99  const context &Context, const device &Device);
118  std::pair<RT::PiProgram, bool>
119  getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
120  const device &Device,
121  const std::string &CompileAndLinkOptions,
122  SerializedObj SpecConsts);
136  RT::PiProgram getBuiltPIProgram(OSModuleHandle M,
137  const ContextImplPtr &ContextImpl,
138  const DeviceImplPtr &DeviceImpl,
139  const std::string &KernelName,
140  const program_impl *Prg = nullptr,
141  bool JITCompilationIsRequired = false);
142 
143  RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context,
144  const device &Device,
145  const std::string &KernelName,
146  const property_list &PropList,
147  bool JITCompilationIsRequired = false);
148 
149  std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
150  getOrCreateKernel(OSModuleHandle M, const ContextImplPtr &ContextImpl,
151  const DeviceImplPtr &DeviceImpl,
152  const std::string &KernelName, const program_impl *Prg);
153 
154  RT::PiProgram getPiProgramFromPiKernel(RT::PiKernel Kernel,
155  const ContextImplPtr Context);
156 
157  void addImages(pi_device_binaries DeviceImages);
158  void debugPrintBinaryImages() const;
159  static std::string getProgramBuildLog(const RT::PiProgram &Program,
160  const ContextImplPtr Context);
161 
173  void flushSpecConstants(const program_impl &Prg,
174  pi::PiProgram NativePrg = nullptr,
175  const RTDeviceBinaryImage *Img = nullptr);
176  uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
177 
184  KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M,
185  pi::PiProgram NativePrg,
186  const std::string &KernelName);
187 
188  // The function returns the unique SYCL kernel identifier associated with a
189  // kernel name.
190  kernel_id getSYCLKernelID(const std::string &KernelName);
191 
192  // The function returns a vector containing all unique SYCL kernel identifiers
193  // in SYCL device images.
194  std::vector<kernel_id> getAllSYCLKernelIDs();
195 
196  // The function returns the unique SYCL kernel identifier associated with a
197  // built-in kernel name.
198  kernel_id getBuiltInKernelID(const std::string &KernelName);
199 
200  // The function inserts or initializes a device_global entry into the
201  // device_global map.
202  void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
203  const char *UniqueId);
204 
205  // Returns true if any available image is compatible with the device Dev.
206  bool hasCompatibleImage(const device &Dev);
207 
208  // The function gets a device_global entry identified by the pointer to the
209  // device_global object from the device_global map.
210  DeviceGlobalMapEntry *getDeviceGlobalEntry(const void *DeviceGlobalPtr);
211 
212  // The function gets multiple device_global entries identified by their unique
213  // IDs from the device_global map.
214  std::vector<DeviceGlobalMapEntry *>
215  getDeviceGlobalEntries(const std::vector<std::string> &UniqueIds,
216  bool ExcludeDeviceImageScopeDecorated = false);
217 
219  getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage,
220  const context &Ctx, const device &Dev);
221 
222  // The function returns a vector of SYCL device images that are compiled with
223  // the required state and at least one device from the passed list of devices.
224  std::vector<device_image_plain> getSYCLDeviceImagesWithCompatibleState(
225  const context &Ctx, const std::vector<device> &Devs,
226  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs = {});
227 
228  // Brind images in the passed vector to the required state. Does it inplace
229  void
230  bringSYCLDeviceImagesToState(std::vector<device_image_plain> &DeviceImages,
231  bundle_state TargetState);
232 
233  // The function returns a vector of SYCL device images in required state,
234  // which are compatible with at least one of the device from Devs.
235  std::vector<device_image_plain>
236  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
237  bundle_state State);
238 
239  // The function returns a vector of SYCL device images, for which Selector
240  // callable returns true, in required state, which are compatible with at
241  // least one of the device from Devs.
242  std::vector<device_image_plain>
243  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
244  const DevImgSelectorImpl &Selector,
245  bundle_state TargetState);
246 
247  // The function returns a vector of SYCL device images which represent at
248  // least one kernel from kernel ids vector in required state, which are
249  // compatible with at least one of the device from Devs.
250  std::vector<device_image_plain>
251  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
252  const std::vector<kernel_id> &KernelIDs,
253  bundle_state TargetState);
254 
255  // Produces new device image by convering input device image to the object
256  // state
257  device_image_plain compile(const device_image_plain &DeviceImage,
258  const std::vector<device> &Devs,
259  const property_list &PropList);
260 
261  // Produces set of device images by convering input device images to object
262  // the executable state
263  std::vector<device_image_plain>
264  link(const std::vector<device_image_plain> &DeviceImages,
265  const std::vector<device> &Devs, const property_list &PropList);
266 
267  // Produces new device image by converting input device image to the
268  // executable state
269  device_image_plain build(const device_image_plain &DeviceImage,
270  const std::vector<device> &Devs,
271  const property_list &PropList);
272 
273  std::pair<RT::PiKernel, std::mutex *>
274  getOrCreateKernel(const context &Context, const std::string &KernelName,
275  const property_list &PropList, RT::PiProgram Program);
276 
277  ProgramManager();
278  ~ProgramManager() = default;
279 
280  bool kernelUsesAssert(OSModuleHandle M, const std::string &KernelName) const;
281 
282  std::set<RTDeviceBinaryImage *>
283  getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
284 
285 private:
286  ProgramManager(ProgramManager const &) = delete;
287  ProgramManager &operator=(ProgramManager const &) = delete;
288 
289  using ProgramPtr = std::unique_ptr<remove_pointer_t<RT::PiProgram>,
290  decltype(&::piProgramRelease)>;
291  ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context,
292  const std::string &CompileOptions,
293  const std::string &LinkOptions, const RT::PiDevice &Device,
294  uint32_t DeviceLibReqMask);
296  KernelSetId getNextKernelSetId() const;
299  KernelSetId getKernelSetId(OSModuleHandle M,
300  const std::string &KernelName) const;
302  void dumpImage(const RTDeviceBinaryImage &Img, KernelSetId KSId,
303  uint32_t SequenceID = 0) const;
304 
306  void cacheKernelUsesAssertInfo(OSModuleHandle M, RTDeviceBinaryImage &Img);
307 
321 
322  using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
323 
328  std::unordered_map<KernelSetId,
329  std::unique_ptr<std::vector<RTDeviceBinaryImageUPtr>>>
330  m_DeviceImages;
331 
332  using StrToKSIdMap = std::unordered_map<std::string, KernelSetId>;
336  std::unordered_map<OSModuleHandle, StrToKSIdMap> m_KernelSets;
337 
341  std::unordered_map<OSModuleHandle, KernelSetId> m_OSModuleKernelSets;
342 
347  //
348  std::unordered_map<std::string, kernel_id> m_KernelName2KernelIDs;
349 
350  // Maps KernelIDs to device binary images. There can be more than one image
351  // in case of SPIRV + AOT.
353  std::unordered_multimap<kernel_id, RTDeviceBinaryImage *>
354  m_KernelIDs2BinImage;
355 
356  // Maps device binary image to a vector of kernel ids in this image.
357  // Using shared_ptr to avoid expensive copy of the vector.
358  // The vector is initialized in addImages function and is supposed to be
359  // immutable afterwards.
361  std::unordered_map<RTDeviceBinaryImage *,
362  std::shared_ptr<std::vector<kernel_id>>>
363  m_BinImg2KernelIDs;
364 
369  std::mutex m_KernelIDsMutex;
370 
377  std::unordered_set<std::string> m_ServiceKernels;
378 
380  // from kernel bundles.
382  std::unordered_set<std::string> m_ExportedSymbols;
383 
386  std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
387 
389  std::mutex m_BuiltInKernelIDsMutex;
390 
391  // Keeps track of pi_program to image correspondence. Needed for:
392  // - knowing which specialization constants are used in the program and
393  // injecting their current values before compiling the SPIR-V; the binary
394  // image object has info about all spec constants used in the module
395  // - finding kernel argument masks for kernels associated with each
396  // pi_program
397  // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
398  // referenced from outside SYCL runtime and RTDeviceBinaryImage object
399  // lifetime matches program manager's one.
400  // NOTE: keys in the map can be invalid (reference count went to zero and
401  // the underlying program disposed of), so the map can't be used in any way
402  // other than binary image lookup with known live PiProgram as the key.
403  // NOTE: access is synchronized via the MNativeProgramsMutex
404  std::unordered_map<pi::PiProgram, const RTDeviceBinaryImage *> NativePrograms;
405 
407  std::mutex MNativeProgramsMutex;
408 
409  using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
412  std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
413  m_EliminatedKernelArgMasks;
414 
416  bool m_UseSpvFile = false;
417 
418  using KernelNameWithOSModule = std::pair<std::string, OSModuleHandle>;
419  std::set<KernelNameWithOSModule> m_KernelUsesAssert;
420 
421  // Maps between device_global identifiers and associated information.
422  std::unordered_map<std::string, std::unique_ptr<DeviceGlobalMapEntry>>
423  m_DeviceGlobals;
424  std::unordered_map<const void *, DeviceGlobalMapEntry *> m_Ptr2DeviceGlobal;
425 
427  std::mutex m_DeviceGlobalsMutex;
428 };
429 } // namespace detail
430 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
431 } // namespace sycl
sycl::_V1::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:723
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:24
sycl::_V1::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:641
sycl::_V1::detail::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:30
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_complex
@ cl_intel_devicelib_complex
sycl::_V1::detail::doesDevSupportDeviceRequirements
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
Definition: program_manager.cpp:2291
stl.hpp
device.hpp
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:2387
sycl::_V1::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:676
sycl::_V1::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:59
device_global_map.hpp
sycl::_V1::detail::pi::PiDevice
::pi_device PiDevice
Definition: pi.hpp:124
os_util.hpp
sycl::_V1::detail::DeviceGlobalMapEntry
Definition: device_global_map_entry.hpp:81
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:939
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf
@ cl_intel_devicelib_imf
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf_bf16
@ cl_intel_devicelib_imf_bf16
pi.hpp
device_global_map_entry.hpp
export.hpp
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1355
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_math
@ cl_intel_devicelib_math
sycl::_V1::detail::event_impl
Definition: event_impl.hpp:36
kernel_bundle.hpp
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_bfloat16
@ cl_intel_devicelib_bfloat16
sycl::_V1::ext::oneapi::experimental::operator=
annotated_arg & operator=(annotated_arg &)=default
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_assert
@ cl_intel_devicelib_assert
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:2382
sycl::_V1::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:44
sycl::_V1::detail::pi::PiProgram
::pi_program PiProgram
Definition: pi.hpp:130
common.hpp
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:49
sycl::_V1::detail::device_impl
Definition: device_impl.hpp:36
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:760
sycl::_V1::detail::OSModuleHandle
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf_fp64
@ cl_intel_devicelib_imf_fp64
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_complex_fp64
@ cl_intel_devicelib_complex_fp64
sycl::_V1::detail::pi::PiKernel
::pi_kernel PiKernel
Definition: pi.hpp:131
sycl::_V1::detail::queue_impl
Definition: queue_impl.hpp:59
sycl::_V1::detail::KernelSetId
size_t KernelSetId
Definition: common.hpp:443
sycl::_V1::detail::device_image_plain
Definition: kernel_bundle.hpp:76
sycl::_V1::detail::ProgramManager::KernelArgMask
std::vector< bool > KernelArgMask
Definition: program_manager.hpp:84
sycl::_V1::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:66
device_binary_image.hpp
sycl::_V1::detail::program_impl
Definition: program_impl.hpp:38
util.hpp
sycl::_V1::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:68
sycl::_V1::detail::ProgramManager
Definition: program_manager.hpp:81
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_math_fp64
@ cl_intel_devicelib_math_fp64
sycl::_V1::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
sycl::_V1::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:54
spec_constant_impl.hpp
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_cstring
@ cl_intel_devicelib_cstring
sycl::_V1::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:82
sycl::_V1::detail::context_impl
Definition: context_impl.hpp:33
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:41
sycl::_V1::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:479