DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.hpp
Go to the documentation of this file.
1 //==------ program_manager.hpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/export.hpp>
15 #include <sycl/detail/os_util.hpp>
16 #include <sycl/detail/pi.hpp>
17 #include <sycl/detail/util.hpp>
18 #include <sycl/device.hpp>
19 #include <sycl/kernel_bundle.hpp>
20 #include <sycl/stl.hpp>
21 
22 #include <cstdint>
23 #include <map>
24 #include <memory>
25 #include <set>
26 #include <unordered_map>
27 #include <unordered_set>
28 #include <vector>
29 
30 // +++ Entry points referenced by the offload wrapper object {
31 
34 extern "C" __SYCL_EXPORT void __sycl_register_lib(pi_device_binaries desc);
35 
39 extern "C" __SYCL_EXPORT void __sycl_unregister_lib(pi_device_binaries desc);
40 
41 // +++ }
42 
43 namespace sycl {
45 class context;
46 namespace detail {
47 
48 // This value must be the same as in libdevice/device_itt.h.
49 // See sycl/doc/design/ITTAnnotations.md for more info.
50 static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
51 
52 class context_impl;
53 using ContextImplPtr = std::shared_ptr<context_impl>;
54 class device_impl;
55 using DeviceImplPtr = std::shared_ptr<device_impl>;
56 class program_impl;
57 // DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
58 // If any update is made here, need to sync with DeviceLibExt definition
59 // in llvm/tools/sycl-post-link/sycl-post-link.cpp
60 enum class DeviceLibExt : std::uint32_t {
69 };
70 
71 // Provides single loading and building OpenCL programs with unique contexts
72 // that is necessary for no interoperability cases with lambda.
74 public:
75  // TODO use a custom dynamic bitset instead to make initialization simpler.
76  using KernelArgMask = std::vector<bool>;
77 
78  // Returns the single instance of the program manager for the entire
79  // process. Can only be called after staticInit is done.
80  static ProgramManager &getInstance();
81  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M,
82  const std::string &KernelName,
83  const context &Context,
84  const device &Device,
85  bool JITCompilationIsRequired = false);
86  RT::PiProgram createPIProgram(const RTDeviceBinaryImage &Img,
87  const context &Context, const device &Device);
106  std::pair<RT::PiProgram, bool>
107  getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
108  const device &Device,
109  const std::string &CompileAndLinkOptions,
110  SerializedObj SpecConsts);
124  RT::PiProgram getBuiltPIProgram(OSModuleHandle M,
125  const ContextImplPtr &ContextImpl,
126  const DeviceImplPtr &DeviceImpl,
127  const std::string &KernelName,
128  const program_impl *Prg = nullptr,
129  bool JITCompilationIsRequired = false);
130 
131  RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context,
132  const device &Device,
133  const std::string &KernelName,
134  const property_list &PropList,
135  bool JITCompilationIsRequired = false);
136 
137  std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
138  getOrCreateKernel(OSModuleHandle M, const ContextImplPtr &ContextImpl,
139  const DeviceImplPtr &DeviceImpl,
140  const std::string &KernelName, const program_impl *Prg);
141 
142  RT::PiProgram getPiProgramFromPiKernel(RT::PiKernel Kernel,
143  const ContextImplPtr Context);
144 
145  void addImages(pi_device_binaries DeviceImages);
146  void debugPrintBinaryImages() const;
147  static std::string getProgramBuildLog(const RT::PiProgram &Program,
148  const ContextImplPtr Context);
149 
161  void flushSpecConstants(const program_impl &Prg,
162  pi::PiProgram NativePrg = nullptr,
163  const RTDeviceBinaryImage *Img = nullptr);
164  uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
165 
172  KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M,
173  pi::PiProgram NativePrg,
174  const std::string &KernelName);
175 
176  // The function returns the unique SYCL kernel identifier associated with a
177  // kernel name.
178  kernel_id getSYCLKernelID(const std::string &KernelName);
179 
180  // The function returns a vector containing all unique SYCL kernel identifiers
181  // in SYCL device images.
182  std::vector<kernel_id> getAllSYCLKernelIDs();
183 
184  // The function returns the unique SYCL kernel identifier associated with a
185  // built-in kernel name.
186  kernel_id getBuiltInKernelID(const std::string &KernelName);
187 
188  // The function inserts or initializes a device_global entry into the
189  // device_global map.
190  void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
191  const char *UniqueId);
192 
193  // Returns true if any available image is compatible with the device Dev.
194  bool hasCompatibleImage(const device &Dev);
195 
196  // The function returns a vector of SYCL device images that are compiled with
197  // the required state and at least one device from the passed list of devices.
198  std::vector<device_image_plain> getSYCLDeviceImagesWithCompatibleState(
199  const context &Ctx, const std::vector<device> &Devs,
200  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs = {});
201 
202  // Brind images in the passed vector to the required state. Does it inplace
203  void
204  bringSYCLDeviceImagesToState(std::vector<device_image_plain> &DeviceImages,
205  bundle_state TargetState);
206 
207  // The function returns a vector of SYCL device images in required state,
208  // which are compatible with at least one of the device from Devs.
209  std::vector<device_image_plain>
210  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
211  bundle_state State);
212 
213  // The function returns a vector of SYCL device images, for which Selector
214  // callable returns true, in required state, which are compatible with at
215  // least one of the device from Devs.
216  std::vector<device_image_plain>
217  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
218  const DevImgSelectorImpl &Selector,
219  bundle_state TargetState);
220 
221  // The function returns a vector of SYCL device images which represent at
222  // least one kernel from kernel ids vector in required state, which are
223  // compatible with at least one of the device from Devs.
224  std::vector<device_image_plain>
225  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
226  const std::vector<kernel_id> &KernelIDs,
227  bundle_state TargetState);
228 
229  // Produces new device image by convering input device image to the object
230  // state
231  device_image_plain compile(const device_image_plain &DeviceImage,
232  const std::vector<device> &Devs,
233  const property_list &PropList);
234 
235  // Produces set of device images by convering input device images to object
236  // the executable state
237  std::vector<device_image_plain>
238  link(const std::vector<device_image_plain> &DeviceImages,
239  const std::vector<device> &Devs, const property_list &PropList);
240 
241  // Produces new device image by converting input device image to the
242  // executable state
243  device_image_plain build(const device_image_plain &DeviceImage,
244  const std::vector<device> &Devs,
245  const property_list &PropList);
246 
247  std::pair<RT::PiKernel, std::mutex *>
248  getOrCreateKernel(const context &Context, const std::string &KernelName,
249  const property_list &PropList, RT::PiProgram Program);
250 
251  ProgramManager();
252  ~ProgramManager() = default;
253 
254  bool kernelUsesAssert(OSModuleHandle M, const std::string &KernelName) const;
255 
256 private:
257  ProgramManager(ProgramManager const &) = delete;
258  ProgramManager &operator=(ProgramManager const &) = delete;
259 
260  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId,
261  const context &Context,
262  const device &Device,
263  bool JITCompilationIsRequired = false);
264  using ProgramPtr = std::unique_ptr<remove_pointer_t<RT::PiProgram>,
265  decltype(&::piProgramRelease)>;
266  ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context,
267  const std::string &CompileOptions,
268  const std::string &LinkOptions, const RT::PiDevice &Device,
269  uint32_t DeviceLibReqMask);
271  KernelSetId getNextKernelSetId() const;
274  KernelSetId getKernelSetId(OSModuleHandle M,
275  const std::string &KernelName) const;
277  void dumpImage(const RTDeviceBinaryImage &Img, KernelSetId KSId) const;
278 
280  void cacheKernelUsesAssertInfo(OSModuleHandle M, RTDeviceBinaryImage &Img);
281 
295 
296  using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
297 
302  std::unordered_map<KernelSetId,
303  std::unique_ptr<std::vector<RTDeviceBinaryImageUPtr>>>
304  m_DeviceImages;
305 
306  using StrToKSIdMap = std::unordered_map<std::string, KernelSetId>;
310  std::unordered_map<OSModuleHandle, StrToKSIdMap> m_KernelSets;
311 
315  std::unordered_map<OSModuleHandle, KernelSetId> m_OSModuleKernelSets;
316 
321  //
322  std::unordered_map<std::string, kernel_id> m_KernelName2KernelIDs;
323 
324  // Maps KernelIDs to device binary images. There can be more than one image
325  // in case of SPIRV + AOT.
327  std::unordered_multimap<kernel_id, RTDeviceBinaryImage *>
328  m_KernelIDs2BinImage;
329 
330  // Maps device binary image to a vector of kernel ids in this image.
331  // Using shared_ptr to avoid expensive copy of the vector.
332  // The vector is initialized in addImages function and is supposed to be
333  // immutable afterwards.
335  std::unordered_map<RTDeviceBinaryImage *,
336  std::shared_ptr<std::vector<kernel_id>>>
337  m_BinImg2KernelIDs;
338 
343  std::mutex m_KernelIDsMutex;
344 
351  std::unordered_set<std::string> m_ServiceKernels;
352 
354  // from kernel bundles.
356  std::unordered_set<std::string> m_ExportedSymbols;
357 
360  std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
361 
363  std::mutex m_BuiltInKernelIDsMutex;
364 
365  // Keeps track of pi_program to image correspondence. Needed for:
366  // - knowing which specialization constants are used in the program and
367  // injecting their current values before compiling the SPIR-V; the binary
368  // image object has info about all spec constants used in the module
369  // - finding kernel argument masks for kernels associated with each
370  // pi_program
371  // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
372  // referenced from outside SYCL runtime and RTDeviceBinaryImage object
373  // lifetime matches program manager's one.
374  // NOTE: keys in the map can be invalid (reference count went to zero and
375  // the underlying program disposed of), so the map can't be used in any way
376  // other than binary image lookup with known live PiProgram as the key.
377  // NOTE: access is synchronized via the MNativeProgramsMutex
378  std::unordered_map<pi::PiProgram, const RTDeviceBinaryImage *> NativePrograms;
379 
381  std::mutex MNativeProgramsMutex;
382 
383  using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
386  std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
387  m_EliminatedKernelArgMasks;
388 
390  bool m_UseSpvFile = false;
391 
392  using KernelNameWithOSModule = std::pair<std::string, OSModuleHandle>;
393  std::set<KernelNameWithOSModule> m_KernelUsesAssert;
394 
395  // Maps between device_global identifiers and associated information.
396  std::unordered_map<std::string, std::unique_ptr<DeviceGlobalMapEntry>>
397  m_DeviceGlobals;
398  std::unordered_map<const void *, DeviceGlobalMapEntry *> m_Ptr2DeviceGlobal;
399 
401  std::mutex m_DeviceGlobalsMutex;
402 };
403 } // namespace detail
404 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
405 } // namespace sycl
sycl::_V1::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:698
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
sycl::_V1::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:616
sycl::_V1::detail::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:30
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_complex
@ cl_intel_devicelib_complex
stl.hpp
device.hpp
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:13
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:2057
sycl::_V1::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:651
sycl::_V1::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:55
sycl::_V1::detail::pi::PiDevice
::pi_device PiDevice
Definition: pi.hpp:110
os_util.hpp
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:863
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf
@ cl_intel_devicelib_imf
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:13
pi.hpp
device_global_map_entry.hpp
export.hpp
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1333
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_math
@ cl_intel_devicelib_math
kernel_bundle.hpp
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_assert
@ cl_intel_devicelib_assert
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:2052
Device
@ Device
Definition: usm_allocator.hpp:14
sycl::_V1::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:40
sycl::_V1::detail::pi::PiProgram
::pi_program PiProgram
Definition: pi.hpp:115
common.hpp
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:47
sycl::_V1::detail::device_impl
Definition: device_impl.hpp:35
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:719
sycl::_V1::detail::OSModuleHandle
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_imf_fp64
@ cl_intel_devicelib_imf_fp64
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_complex_fp64
@ cl_intel_devicelib_complex_fp64
sycl::_V1::detail::pi::PiKernel
::pi_kernel PiKernel
Definition: pi.hpp:116
sycl::_V1::detail::KernelSetId
size_t KernelSetId
Definition: common.hpp:383
sycl::_V1::detail::device_image_plain
Definition: kernel_bundle.hpp:72
sycl::_V1::detail::ProgramManager::KernelArgMask
std::vector< bool > KernelArgMask
Definition: program_manager.hpp:76
sycl::_V1::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:60
device_binary_image.hpp
sycl::_V1::detail::program_impl
Definition: program_impl.hpp:38
util.hpp
sycl::_V1::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:56
sycl::_V1::detail::ProgramManager
Definition: program_manager.hpp:73
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_math_fp64
@ cl_intel_devicelib_math_fp64
sycl::_V1::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
sycl::_V1::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:50
spec_constant_impl.hpp
sycl::_V1::detail::DeviceLibExt::cl_intel_devicelib_cstring
@ cl_intel_devicelib_cstring
sycl::_V1::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:54
sycl::_V1::detail::context_impl
Definition: context_impl.hpp:31
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:39
sycl::_V1::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:455