DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.hpp
Go to the documentation of this file.
1 //==------ program_manager.hpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
14 #include <CL/sycl/detail/pi.hpp>
15 #include <CL/sycl/detail/util.hpp>
16 #include <CL/sycl/device.hpp>
18 #include <CL/sycl/stl.hpp>
21 
22 #include <cstdint>
23 #include <map>
24 #include <memory>
25 #include <set>
26 #include <unordered_map>
27 #include <unordered_set>
28 #include <vector>
29 
30 // +++ Entry points referenced by the offload wrapper object {
31 
34 extern "C" __SYCL_EXPORT void __sycl_register_lib(pi_device_binaries desc);
35 
39 extern "C" __SYCL_EXPORT void __sycl_unregister_lib(pi_device_binaries desc);
40 
41 // +++ }
42 
44 namespace sycl {
45 class context;
46 namespace detail {
47 
48 // This value must be the same as in libdevice/device_itt.h.
49 // See sycl/doc/design/ITTAnnotations.md for more info.
50 static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
51 
52 class context_impl;
53 using ContextImplPtr = std::shared_ptr<context_impl>;
54 class device_impl;
55 using DeviceImplPtr = std::shared_ptr<device_impl>;
56 class program_impl;
57 // DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
58 // If any update is made here, need to sync with DeviceLibExt definition
59 // in llvm/tools/sycl-post-link/sycl-post-link.cpp
60 enum class DeviceLibExt : std::uint32_t {
67 };
68 
69 // Provides single loading and building OpenCL programs with unique contexts
70 // that is necessary for no interoperability cases with lambda.
72 public:
73  // TODO use a custom dynamic bitset instead to make initialization simpler.
74  using KernelArgMask = std::vector<bool>;
75 
76  // Returns the single instance of the program manager for the entire
77  // process. Can only be called after staticInit is done.
78  static ProgramManager &getInstance();
79  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M,
80  const std::string &KernelName,
81  const context &Context,
82  const device &Device,
83  bool JITCompilationIsRequired = false);
84  RT::PiProgram createPIProgram(const RTDeviceBinaryImage &Img,
85  const context &Context, const device &Device);
104  std::pair<RT::PiProgram, bool>
105  getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
106  const device &Device,
107  const std::string &CompileAndLinkOptions,
108  SerializedObj SpecConsts);
122  RT::PiProgram getBuiltPIProgram(OSModuleHandle M,
123  const ContextImplPtr &ContextImpl,
124  const DeviceImplPtr &DeviceImpl,
125  const std::string &KernelName,
126  const program_impl *Prg = nullptr,
127  bool JITCompilationIsRequired = false);
128 
129  RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context,
130  const device &Device,
131  const std::string &KernelName,
132  const property_list &PropList,
133  bool JITCompilationIsRequired = false);
134 
135  std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
136  getOrCreateKernel(OSModuleHandle M, const ContextImplPtr &ContextImpl,
137  const DeviceImplPtr &DeviceImpl,
138  const std::string &KernelName, const program_impl *Prg);
139 
140  RT::PiProgram getPiProgramFromPiKernel(RT::PiKernel Kernel,
141  const ContextImplPtr Context);
142 
143  void addImages(pi_device_binaries DeviceImages);
144  void debugPrintBinaryImages() const;
145  static std::string getProgramBuildLog(const RT::PiProgram &Program,
146  const ContextImplPtr Context);
147 
159  void flushSpecConstants(const program_impl &Prg,
160  pi::PiProgram NativePrg = nullptr,
161  const RTDeviceBinaryImage *Img = nullptr);
162  uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
163 
170  KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M,
171  pi::PiProgram NativePrg,
172  const std::string &KernelName);
173 
174  // The function returns the unique SYCL kernel identifier associated with a
175  // kernel name.
176  kernel_id getSYCLKernelID(const std::string &KernelName);
177 
178  // The function returns a vector containing all unique SYCL kernel identifiers
179  // in SYCL device images.
180  std::vector<kernel_id> getAllSYCLKernelIDs();
181 
182  // The function returns the unique SYCL kernel identifier associated with a
183  // built-in kernel name.
184  kernel_id getBuiltInKernelID(const std::string &KernelName);
185 
186  // The function inserts or initializes a device_global entry into the
187  // device_global map.
188  void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
189  const char *UniqueId);
190 
191  // The function returns a vector of SYCL device images that are compiled with
192  // the required state and at least one device from the passed list of devices.
193  std::vector<device_image_plain> getSYCLDeviceImagesWithCompatibleState(
194  const context &Ctx, const std::vector<device> &Devs,
195  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs = {});
196 
197  // Brind images in the passed vector to the required state. Does it inplace
198  void
199  bringSYCLDeviceImagesToState(std::vector<device_image_plain> &DeviceImages,
200  bundle_state TargetState);
201 
202  // The function returns a vector of SYCL device images in required state,
203  // which are compatible with at least one of the device from Devs.
204  std::vector<device_image_plain>
205  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
206  bundle_state State);
207 
208  // The function returns a vector of SYCL device images, for which Selector
209  // callable returns true, in required state, which are compatible with at
210  // least one of the device from Devs.
211  std::vector<device_image_plain>
212  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
213  const DevImgSelectorImpl &Selector,
214  bundle_state TargetState);
215 
216  // The function returns a vector of SYCL device images which represent at
217  // least one kernel from kernel ids vector in required state, which are
218  // compatible with at least one of the device from Devs.
219  std::vector<device_image_plain>
220  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
221  const std::vector<kernel_id> &KernelIDs,
222  bundle_state TargetState);
223 
224  // Produces new device image by convering input device image to the object
225  // state
226  device_image_plain compile(const device_image_plain &DeviceImage,
227  const std::vector<device> &Devs,
228  const property_list &PropList);
229 
230  // Produces set of device images by convering input device images to object
231  // the executable state
232  std::vector<device_image_plain>
233  link(const std::vector<device_image_plain> &DeviceImages,
234  const std::vector<device> &Devs, const property_list &PropList);
235 
236  // Produces new device image by converting input device image to the
237  // executable state
238  device_image_plain build(const device_image_plain &DeviceImage,
239  const std::vector<device> &Devs,
240  const property_list &PropList);
241 
242  std::pair<RT::PiKernel, std::mutex *>
243  getOrCreateKernel(const context &Context, const std::string &KernelName,
244  const property_list &PropList, RT::PiProgram Program);
245 
246  ProgramManager();
247  ~ProgramManager() = default;
248 
249  bool kernelUsesAssert(OSModuleHandle M, const std::string &KernelName) const;
250 
251 private:
252  ProgramManager(ProgramManager const &) = delete;
253  ProgramManager &operator=(ProgramManager const &) = delete;
254 
255  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId,
256  const context &Context,
257  const device &Device,
258  bool JITCompilationIsRequired = false);
259  using ProgramPtr = std::unique_ptr<remove_pointer_t<RT::PiProgram>,
260  decltype(&::piProgramRelease)>;
261  ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context,
262  const std::string &CompileOptions,
263  const std::string &LinkOptions, const RT::PiDevice &Device,
264  uint32_t DeviceLibReqMask);
266  KernelSetId getNextKernelSetId() const;
269  KernelSetId getKernelSetId(OSModuleHandle M,
270  const std::string &KernelName) const;
272  void dumpImage(const RTDeviceBinaryImage &Img, KernelSetId KSId) const;
273 
275  void cacheKernelUsesAssertInfo(OSModuleHandle M, RTDeviceBinaryImage &Img);
276 
290 
291  using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
292 
297  std::unordered_map<KernelSetId,
298  std::unique_ptr<std::vector<RTDeviceBinaryImageUPtr>>>
299  m_DeviceImages;
300 
301  using StrToKSIdMap = std::unordered_map<std::string, KernelSetId>;
305  std::unordered_map<OSModuleHandle, StrToKSIdMap> m_KernelSets;
306 
310  std::unordered_map<OSModuleHandle, KernelSetId> m_OSModuleKernelSets;
311 
316  //
317  std::unordered_map<std::string, kernel_id> m_KernelName2KernelIDs;
318 
319  // Maps KernelIDs to device binary images. There can be more than one image
320  // in case of SPIRV + AOT.
322  std::unordered_multimap<kernel_id, RTDeviceBinaryImage *>
323  m_KernelIDs2BinImage;
324 
325  // Maps device binary image to a vector of kernel ids in this image.
326  // Using shared_ptr to avoid expensive copy of the vector.
327  // The vector is initialized in addImages function and is supposed to be
328  // immutable afterwards.
330  std::unordered_map<RTDeviceBinaryImage *,
331  std::shared_ptr<std::vector<kernel_id>>>
332  m_BinImg2KernelIDs;
333 
338  std::mutex m_KernelIDsMutex;
339 
346  std::unordered_set<std::string> m_ServiceKernels;
347 
349  // from kernel bundles.
351  std::unordered_set<std::string> m_ExportedSymbols;
352 
355  std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
356 
358  std::mutex m_BuiltInKernelIDsMutex;
359 
360  // Keeps track of pi_program to image correspondence. Needed for:
361  // - knowing which specialization constants are used in the program and
362  // injecting their current values before compiling the SPIR-V; the binary
363  // image object has info about all spec constants used in the module
364  // - finding kernel argument masks for kernels associated with each
365  // pi_program
366  // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
367  // referenced from outside SYCL runtime and RTDeviceBinaryImage object
368  // lifetime matches program manager's one.
369  // NOTE: keys in the map can be invalid (reference count went to zero and
370  // the underlying program disposed of), so the map can't be used in any way
371  // other than binary image lookup with known live PiProgram as the key.
372  // NOTE: access is synchronized via the MNativeProgramsMutex
373  std::unordered_map<pi::PiProgram, const RTDeviceBinaryImage *> NativePrograms;
374 
376  std::mutex MNativeProgramsMutex;
377 
378  using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
381  std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
382  m_EliminatedKernelArgMasks;
383 
385  bool m_UseSpvFile = false;
386 
387  using KernelNameWithOSModule = std::pair<std::string, OSModuleHandle>;
388  std::set<KernelNameWithOSModule> m_KernelUsesAssert;
389 
390  // Maps between device_global identifiers and associated information.
391  std::unordered_map<std::string, std::unique_ptr<DeviceGlobalMapEntry>>
392  m_DeviceGlobals;
393  std::unordered_map<const void *, DeviceGlobalMapEntry *> m_Ptr2DeviceGlobal;
394 
396  std::mutex m_DeviceGlobalsMutex;
397 };
398 } // namespace detail
399 } // namespace sycl
400 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:56
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:32
cl::sycl::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:20
cl::sycl::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:714
cl::sycl::detail::device_impl
Definition: device_impl.hpp:34
cl::sycl::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:667
stl.hpp
device.hpp
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:2012
cl::sycl::detail::device_image_plain
Definition: kernel_bundle.hpp:71
cl::sycl::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:60
cl::sycl::detail::program_impl
Definition: program_impl.hpp:37
os_util.hpp
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:895
sycl
Definition: invoke_simd.hpp:68
cl::sycl::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
util.hpp
cl::sycl::detail::KernelSetId
size_t KernelSetId
Definition: common.hpp:340
pi.hpp
cl::sycl::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
device_global_map_entry.hpp
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:624
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_assert
@ cl_intel_devicelib_assert
export.hpp
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1338
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:35
kernel_bundle.hpp
cl::sycl::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:632
cl::sycl::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:39
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:2007
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::context_impl
Definition: context_impl.hpp:31
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:569
cl::sycl::detail::ProgramManager
Definition: program_manager.hpp:71
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_cstring
@ cl_intel_devicelib_cstring
cl::sycl::detail::OSModuleHandle
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_math
@ cl_intel_devicelib_math
cl::sycl::detail::ProgramManager::KernelArgMask
std::vector< bool > KernelArgMask
Definition: program_manager.hpp:74
cl::sycl::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:467
device_binary_image.hpp
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:35
cl::sycl::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:55
common.hpp
cl::sycl::info::context
context
Definition: info_desc.hpp:42
spec_constant_impl.hpp
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_complex
@ cl_intel_devicelib_complex
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_math_fp64
@ cl_intel_devicelib_math_fp64
cl::sycl::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:50
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_complex_fp64
@ cl_intel_devicelib_complex_fp64
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:73
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12