DPC++ Runtime
Runtime libraries for oneAPI Data Parallel C++
program_manager.hpp
Go to the documentation of this file.
1 //==------ program_manager.hpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
14 #include <CL/sycl/detail/pi.hpp>
15 #include <CL/sycl/detail/util.hpp>
16 #include <CL/sycl/device.hpp>
18 #include <CL/sycl/stl.hpp>
20 
21 #include <cstdint>
22 #include <map>
23 #include <memory>
24 #include <set>
25 #include <unordered_map>
26 #include <unordered_set>
27 #include <vector>
28 
29 // +++ Entry points referenced by the offload wrapper object {
30 
33 extern "C" __SYCL_EXPORT void __sycl_register_lib(pi_device_binaries desc);
34 
38 extern "C" __SYCL_EXPORT void __sycl_unregister_lib(pi_device_binaries desc);
39 
40 // +++ }
41 
43 namespace sycl {
44 class context;
45 namespace detail {
46 
47 // This value must be the same as in libdevice/device_itt.h.
48 // See sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst for more info.
49 static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
50 
51 class context_impl;
52 using ContextImplPtr = std::shared_ptr<context_impl>;
53 class device_impl;
54 using DeviceImplPtr = std::shared_ptr<device_impl>;
55 class program_impl;
56 // DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
57 // If any update is made here, need to sync with DeviceLibExt definition
58 // in llvm/tools/sycl-post-link/sycl-post-link.cpp
59 enum class DeviceLibExt : std::uint32_t {
66 };
67 
68 // Provides single loading and building OpenCL programs with unique contexts
69 // that is necessary for no interoperability cases with lambda.
71 public:
72  // TODO use a custom dynamic bitset instead to make initialization simpler.
73  using KernelArgMask = std::vector<bool>;
74 
75  // Returns the single instance of the program manager for the entire
76  // process. Can only be called after staticInit is done.
77  static ProgramManager &getInstance();
78  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M,
79  const std::string &KernelName,
80  const context &Context,
81  const device &Device,
82  bool JITCompilationIsRequired = false);
83  RT::PiProgram createPIProgram(const RTDeviceBinaryImage &Img,
84  const context &Context, const device &Device);
103  std::pair<RT::PiProgram, bool>
104  getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
105  const device &Device,
106  const std::string &CompileAndLinkOptions,
107  SerializedObj SpecConsts);
121  RT::PiProgram getBuiltPIProgram(OSModuleHandle M,
122  const ContextImplPtr &ContextImpl,
123  const DeviceImplPtr &DeviceImpl,
124  const std::string &KernelName,
125  const program_impl *Prg = nullptr,
126  bool JITCompilationIsRequired = false);
127 
128  RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context,
129  const device &Device,
130  const std::string &KernelName,
131  const property_list &PropList,
132  bool JITCompilationIsRequired = false);
133 
134  std::tuple<RT::PiKernel, std::mutex *, RT::PiProgram>
135  getOrCreateKernel(OSModuleHandle M, const ContextImplPtr &ContextImpl,
136  const DeviceImplPtr &DeviceImpl,
137  const std::string &KernelName, const program_impl *Prg);
138 
139  RT::PiProgram getPiProgramFromPiKernel(RT::PiKernel Kernel,
140  const ContextImplPtr Context);
141 
142  void addImages(pi_device_binaries DeviceImages);
143  void debugPrintBinaryImages() const;
144  static std::string getProgramBuildLog(const RT::PiProgram &Program,
145  const ContextImplPtr Context);
146 
158  void flushSpecConstants(const program_impl &Prg,
159  pi::PiProgram NativePrg = nullptr,
160  const RTDeviceBinaryImage *Img = nullptr);
161  uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
162 
169  KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M,
170  pi::PiProgram NativePrg,
171  const std::string &KernelName);
172 
173  // The function returns the unique SYCL kernel identifier associated with a
174  // kernel name.
175  kernel_id getSYCLKernelID(const std::string &KernelName);
176 
177  // The function returns a vector containing all unique SYCL kernel identifiers
178  // in SYCL device images.
179  std::vector<kernel_id> getAllSYCLKernelIDs();
180 
181  // The function returns the unique SYCL kernel identifier associated with a
182  // built-in kernel name.
183  kernel_id getBuiltInKernelID(const std::string &KernelName);
184 
185  // The function returns a vector of SYCL device images that are compiled with
186  // the required state and at least one device from the passed list of devices.
187  std::vector<device_image_plain>
188  getSYCLDeviceImagesWithCompatibleState(const context &Ctx,
189  const std::vector<device> &Devs,
190  bundle_state TargetState);
191 
192  // Brind images in the passed vector to the required state. Does it inplace
193  void
194  bringSYCLDeviceImagesToState(std::vector<device_image_plain> &DeviceImages,
195  bundle_state TargetState);
196 
197  // The function returns a vector of SYCL device images in required state,
198  // which are compatible with at least one of the device from Devs.
199  std::vector<device_image_plain>
200  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
201  bundle_state State);
202 
203  // The function returns a vector of SYCL device images, for which Selector
204  // callable returns true, in required state, which are compatible with at
205  // least one of the device from Devs.
206  std::vector<device_image_plain>
207  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
208  const DevImgSelectorImpl &Selector,
209  bundle_state TargetState);
210 
211  // The function returns a vector of SYCL device images which represent at
212  // least one kernel from kernel ids vector in required state, which are
213  // compatible with at least one of the device from Devs.
214  std::vector<device_image_plain>
215  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
216  const std::vector<kernel_id> &KernelIDs,
217  bundle_state TargetState);
218 
219  // Produces new device image by convering input device image to the object
220  // state
221  device_image_plain compile(const device_image_plain &DeviceImage,
222  const std::vector<device> &Devs,
223  const property_list &PropList);
224 
225  // Produces set of device images by convering input device images to object
226  // the executable state
227  std::vector<device_image_plain>
228  link(const std::vector<device_image_plain> &DeviceImages,
229  const std::vector<device> &Devs, const property_list &PropList);
230 
231  // Produces new device image by converting input device image to the
232  // executable state
233  device_image_plain build(const device_image_plain &DeviceImage,
234  const std::vector<device> &Devs,
235  const property_list &PropList);
236 
237  std::pair<RT::PiKernel, std::mutex *>
238  getOrCreateKernel(const context &Context, const std::string &KernelName,
239  const property_list &PropList, RT::PiProgram Program);
240 
241  ProgramManager();
242  ~ProgramManager() = default;
243 
244  bool kernelUsesAssert(OSModuleHandle M, const std::string &KernelName) const;
245 
246 private:
247  ProgramManager(ProgramManager const &) = delete;
248  ProgramManager &operator=(ProgramManager const &) = delete;
249 
250  RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId,
251  const context &Context,
252  const device &Device,
253  bool JITCompilationIsRequired = false);
254  using ProgramPtr = std::unique_ptr<remove_pointer_t<RT::PiProgram>,
255  decltype(&::piProgramRelease)>;
256  ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context,
257  const std::string &CompileOptions,
258  const std::string &LinkOptions, const RT::PiDevice &Device,
259  std::map<std::pair<DeviceLibExt, RT::PiDevice>,
260  RT::PiProgram> &CachedLibPrograms,
261  uint32_t DeviceLibReqMask);
263  KernelSetId getNextKernelSetId() const;
266  KernelSetId getKernelSetId(OSModuleHandle M,
267  const std::string &KernelName) const;
269  void dumpImage(const RTDeviceBinaryImage &Img, KernelSetId KSId) const;
270 
272  void cacheKernelUsesAssertInfo(OSModuleHandle M, RTDeviceBinaryImage &Img);
273 
287 
288  using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
289 
294  std::unordered_map<KernelSetId,
295  std::unique_ptr<std::vector<RTDeviceBinaryImageUPtr>>>
296  m_DeviceImages;
297 
298  using StrToKSIdMap = std::unordered_map<std::string, KernelSetId>;
302  std::unordered_map<OSModuleHandle, StrToKSIdMap> m_KernelSets;
303 
307  std::unordered_map<OSModuleHandle, KernelSetId> m_OSModuleKernelSets;
308 
313  std::unordered_map<std::string, kernel_id> m_KernelIDs;
314 
319  std::mutex m_KernelIDsMutex;
320 
327  std::unordered_set<std::string> m_ServiceKernels;
328 
330  // from kernel bundles.
332  std::unordered_set<std::string> m_ExportedSymbols;
333 
336  std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
337 
339  std::mutex m_BuiltInKernelIDsMutex;
340 
341  // Keeps track of pi_program to image correspondence. Needed for:
342  // - knowing which specialization constants are used in the program and
343  // injecting their current values before compiling the SPIR-V; the binary
344  // image object has info about all spec constants used in the module
345  // - finding kernel argument masks for kernels associated with each
346  // pi_program
347  // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
348  // referenced from outside SYCL runtime and RTDeviceBinaryImage object
349  // lifetime matches program manager's one.
350  // NOTE: keys in the map can be invalid (reference count went to zero and
351  // the underlying program disposed of), so the map can't be used in any way
352  // other than binary image lookup with known live PiProgram as the key.
353  // NOTE: access is synchronized via the MNativeProgramsMutex
354  std::unordered_map<pi::PiProgram, const RTDeviceBinaryImage *> NativePrograms;
355 
357  std::mutex MNativeProgramsMutex;
358 
359  using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
362  std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
363  m_EliminatedKernelArgMasks;
364 
366  bool m_UseSpvFile = false;
367 
368  using KernelNameWithOSModule = std::pair<std::string, OSModuleHandle>;
369  std::set<KernelNameWithOSModule> m_KernelUsesAssert;
370 };
371 } // namespace detail
372 } // namespace sycl
373 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::SerializedObj
std::vector< unsigned char > SerializedObj
Definition: util.hpp:56
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:31
cl::sycl::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:20
cl::sycl::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:694
cl::sycl::detail::device_impl
Definition: device_impl.hpp:33
cl::sycl::link
kernel_bundle< bundle_state::executable > link(const std::vector< kernel_bundle< bundle_state::object >> &ObjectBundles, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:647
stl.hpp
device.hpp
__sycl_unregister_lib
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
Definition: program_manager.cpp:1884
cl::sycl::detail::device_image_plain
Definition: kernel_bundle.hpp:71
cl::sycl::detail::DeviceLibExt
DeviceLibExt
Definition: program_manager.hpp:59
cl::sycl::detail::program_impl
Definition: program_impl.hpp:37
os_util.hpp
pi_device_binaries_struct
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:840
cl::sycl::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
util.hpp
cl::sycl::detail::KernelSetId
size_t KernelSetId
Definition: common.hpp:340
pi.hpp
cl::sycl::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:578
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_assert
@ cl_intel_devicelib_assert
export.hpp
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1024
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:35
kernel_bundle.hpp
cl::sycl::compile
kernel_bundle< bundle_state::object > compile(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:612
cl::sycl::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:39
__sycl_register_lib
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
Definition: program_manager.cpp:1879
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::context_impl
Definition: context_impl.hpp:31
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:523
cl::sycl::detail::ProgramManager
Definition: program_manager.hpp:70
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_cstring
@ cl_intel_devicelib_cstring
cl::sycl::detail::OSModuleHandle
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_math
@ cl_intel_devicelib_math
cl::sycl::detail::ProgramManager::KernelArgMask
std::vector< bool > KernelArgMask
Definition: program_manager.hpp:73
cl::sycl::detail::DevImgSelectorImpl
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
Definition: kernel_bundle.hpp:467
device_binary_image.hpp
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:35
cl::sycl::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:54
common.hpp
cl::sycl::info::context
context
Definition: info_desc.hpp:41
spec_constant_impl.hpp
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_complex
@ cl_intel_devicelib_complex
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_math_fp64
@ cl_intel_devicelib_math_fp64
cl::sycl::detail::ITTSpecConstId
static constexpr uint32_t ITTSpecConstId
Definition: program_manager.hpp:49
cl::sycl::detail::DeviceLibExt::cl_intel_devicelib_complex_fp64
@ cl_intel_devicelib_complex_fp64
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:71
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12