DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.hpp
Go to the documentation of this file.
1 //==------ program_manager.hpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 #include <detail/cg.hpp>
16 #include <sycl/detail/cg_types.hpp>
17 #include <sycl/detail/common.hpp>
19 #include <sycl/detail/export.hpp>
21 #include <sycl/detail/os_util.hpp>
22 #include <sycl/detail/pi.hpp>
23 #include <sycl/detail/util.hpp>
24 #include <sycl/device.hpp>
25 #include <sycl/kernel_bundle.hpp>
26 
27 #include <cstdint>
28 #include <map>
29 #include <memory>
30 #include <set>
31 #include <unordered_map>
32 #include <unordered_set>
33 #include <vector>
34 
35 // +++ Entry points referenced by the offload wrapper object {
36 
39 extern "C" __SYCL_EXPORT void __sycl_register_lib(pi_device_binaries desc);
40 
44 extern "C" __SYCL_EXPORT void __sycl_unregister_lib(pi_device_binaries desc);
45 
46 // +++ }
47 
48 namespace sycl {
49 inline namespace _V1 {
50 class context;
51 namespace detail {
52 
53 bool doesDevSupportDeviceRequirements(const device &Dev,
54  const RTDeviceBinaryImage &BinImages);
55 std::optional<sycl::exception>
56 checkDevSupportDeviceRequirements(const device &Dev,
57  const RTDeviceBinaryImage &BinImages,
58  const NDRDescT &NDRDesc = {});
59 
60 // This value must be the same as in libdevice/device_itt.h.
61 // See sycl/doc/design/ITTAnnotations.md for more info.
62 static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
63 
64 class context_impl;
65 using ContextImplPtr = std::shared_ptr<context_impl>;
66 class device_impl;
67 using DeviceImplPtr = std::shared_ptr<device_impl>;
68 class queue_impl;
69 class event_impl;
70 // DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
71 // If any update is made here, need to sync with DeviceLibExt definition
72 // in llvm/tools/sycl-post-link/sycl-post-link.cpp
73 enum class DeviceLibExt : std::uint32_t {
84 };
85 
86 // Provides single loading and building OpenCL programs with unique contexts
87 // that is necessary for no interoperability cases with lambda.
89 public:
90  // Returns the single instance of the program manager for the entire
91  // process. Can only be called after staticInit is done.
92  static ProgramManager &getInstance();
93 
94  RTDeviceBinaryImage &getDeviceImage(const std::string &KernelName,
95  const context &Context,
96  const device &Device,
97  bool JITCompilationIsRequired = false);
98 
100  const std::unordered_set<RTDeviceBinaryImage *> &ImagesToVerify,
101  const context &Context, const device &Device,
102  bool JITCompilationIsRequired = false);
103 
105  const context &Context,
106  const device &Device);
125  std::pair<sycl::detail::pi::PiProgram, bool>
126  getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
127  const device &Device,
128  const std::string &CompileAndLinkOptions,
129  SerializedObj SpecConsts);
140  getBuiltPIProgram(const ContextImplPtr &ContextImpl,
141  const DeviceImplPtr &DeviceImpl,
142  const std::string &KernelName, const NDRDescT &NDRDesc = {},
143  bool JITCompilationIsRequired = false);
144 
146  getBuiltPIProgram(const context &Context, const device &Device,
147  const std::string &KernelName,
148  const property_list &PropList,
149  bool JITCompilationIsRequired = false);
150 
151  std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *,
153  getOrCreateKernel(const ContextImplPtr &ContextImpl,
154  const DeviceImplPtr &DeviceImpl,
155  const std::string &KernelName,
156  const NDRDescT &NDRDesc = {});
157 
160  const ContextImplPtr Context);
161 
162  void addImages(pi_device_binaries DeviceImages);
163  void debugPrintBinaryImages() const;
164  static std::string
166  const ContextImplPtr Context);
167 
168  uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
169 
174  const KernelArgMask *
176  const std::string &KernelName);
177 
178  // The function returns the unique SYCL kernel identifier associated with a
179  // kernel name.
180  kernel_id getSYCLKernelID(const std::string &KernelName);
181 
182  // The function returns a vector containing all unique SYCL kernel identifiers
183  // in SYCL device images.
184  std::vector<kernel_id> getAllSYCLKernelIDs();
185 
186  // The function returns the unique SYCL kernel identifier associated with a
187  // built-in kernel name.
188  kernel_id getBuiltInKernelID(const std::string &KernelName);
189 
190  // The function inserts or initializes a device_global entry into the
191  // device_global map.
192  void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
193  const char *UniqueId);
194 
195  // Returns true if any available image is compatible with the device Dev.
196  bool hasCompatibleImage(const device &Dev);
197 
198  // The function gets a device_global entry identified by the pointer to the
199  // device_global object from the device_global map.
200  DeviceGlobalMapEntry *getDeviceGlobalEntry(const void *DeviceGlobalPtr);
201 
202  // The function gets multiple device_global entries identified by their unique
203  // IDs from the device_global map.
204  std::vector<DeviceGlobalMapEntry *>
205  getDeviceGlobalEntries(const std::vector<std::string> &UniqueIds,
206  bool ExcludeDeviceImageScopeDecorated = false);
207  // The function inserts or initializes a host_pipe entry into the
208  // host_pipe map.
209  void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId);
210 
211  // The function gets a host_pipe entry identified by the unique ID from
212  // the host_pipe map.
213  HostPipeMapEntry *getHostPipeEntry(const std::string &UniqueId);
214 
215  // The function gets a host_pipe entry identified by the pointer to the
216  // host_pipe object from the host_pipe map.
217  HostPipeMapEntry *getHostPipeEntry(const void *HostPipePtr);
218 
221  const context &Ctx, const device &Dev);
222 
223  // The function returns a vector of SYCL device images that are compiled with
224  // the required state and at least one device from the passed list of devices.
225  std::vector<device_image_plain> getSYCLDeviceImagesWithCompatibleState(
226  const context &Ctx, const std::vector<device> &Devs,
227  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs = {});
228 
229  // Brind images in the passed vector to the required state. Does it inplace
230  void
231  bringSYCLDeviceImagesToState(std::vector<device_image_plain> &DeviceImages,
232  bundle_state TargetState);
233 
234  // The function returns a vector of SYCL device images in required state,
235  // which are compatible with at least one of the device from Devs.
236  std::vector<device_image_plain>
237  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
238  bundle_state State);
239 
240  // The function returns a vector of SYCL device images, for which Selector
241  // callable returns true, in required state, which are compatible with at
242  // least one of the device from Devs.
243  std::vector<device_image_plain>
244  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
245  const DevImgSelectorImpl &Selector,
246  bundle_state TargetState);
247 
248  // The function returns a vector of SYCL device images which represent at
249  // least one kernel from kernel ids vector in required state, which are
250  // compatible with at least one of the device from Devs.
251  std::vector<device_image_plain>
252  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
253  const std::vector<kernel_id> &KernelIDs,
254  bundle_state TargetState);
255 
256  // Produces new device image by convering input device image to the object
257  // state
258  device_image_plain compile(const device_image_plain &DeviceImage,
259  const std::vector<device> &Devs,
260  const property_list &PropList);
261 
262  // Produces set of device images by convering input device images to object
263  // the executable state
264  std::vector<device_image_plain> link(const device_image_plain &DeviceImages,
265  const std::vector<device> &Devs,
266  const property_list &PropList);
267 
268  // Produces new device image by converting input device image to the
269  // executable state
270  device_image_plain build(const device_image_plain &DeviceImage,
271  const std::vector<device> &Devs,
272  const property_list &PropList);
273 
274  std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *>
275  getOrCreateKernel(const context &Context, const std::string &KernelName,
276  const property_list &PropList,
278 
279  ProgramManager();
280  ~ProgramManager() = default;
281 
282  bool kernelUsesAssert(const std::string &KernelName) const;
283 
284  bool kernelUsesAsan() const { return m_AsanFoundInImage; }
285 
286  std::set<RTDeviceBinaryImage *>
287  getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
288 
289 private:
290  ProgramManager(ProgramManager const &) = delete;
291  ProgramManager &operator=(ProgramManager const &) = delete;
292 
293  using ProgramPtr =
294  std::unique_ptr<remove_pointer_t<sycl::detail::pi::PiProgram>,
295  decltype(&::piProgramRelease)>;
296  ProgramPtr
297  build(ProgramPtr Program, const ContextImplPtr Context,
298  const std::string &CompileOptions, const std::string &LinkOptions,
299  const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask,
300  const std::vector<sycl::detail::pi::PiProgram> &ProgramsToLink);
302  void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;
303 
305  void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img);
306 
307  std::set<RTDeviceBinaryImage *>
308  collectDependentDeviceImagesForVirtualFunctions(
309  const RTDeviceBinaryImage &Img, device Dev);
310 
313  using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
314 
319  //
320  std::unordered_map<std::string, kernel_id> m_KernelName2KernelIDs;
321 
322  // Maps KernelIDs to device binary images. There can be more than one image
323  // in case of SPIRV + AOT.
324  // Using shared_ptr to avoid expensive copy of the vector.
326  std::unordered_multimap<kernel_id, RTDeviceBinaryImage *>
327  m_KernelIDs2BinImage;
328 
329  // Maps device binary image to a vector of kernel ids in this image.
330  // Using shared_ptr to avoid expensive copy of the vector.
331  // The vector is initialized in addImages function and is supposed to be
332  // immutable afterwards.
334  std::unordered_map<RTDeviceBinaryImage *,
335  std::shared_ptr<std::vector<kernel_id>>>
336  m_BinImg2KernelIDs;
337 
342  std::mutex m_KernelIDsMutex;
343 
350  std::unordered_multimap<std::string, RTDeviceBinaryImage *> m_ServiceKernels;
351 
353  // from kernel bundles.
355  std::unordered_set<std::string> m_ExportedSymbols;
356 
359  std::unordered_set<RTDeviceBinaryImageUPtr> m_DeviceImages;
360 
363  std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
364 
367  std::unordered_map<std::string, std::set<RTDeviceBinaryImage *>>
368  m_VFSet2BinImage;
369 
371  std::mutex m_BuiltInKernelIDsMutex;
372 
373  // Keeps track of pi_program to image correspondence. Needed for:
374  // - knowing which specialization constants are used in the program and
375  // injecting their current values before compiling the SPIR-V; the binary
376  // image object has info about all spec constants used in the module
377  // - finding kernel argument masks for kernels associated with each
378  // pi_program
379  // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
380  // referenced from outside SYCL runtime and RTDeviceBinaryImage object
381  // lifetime matches program manager's one.
382  // NOTE: keys in the map can be invalid (reference count went to zero and
383  // the underlying program disposed of), so the map can't be used in any way
384  // other than binary image lookup with known live PiProgram as the key.
385  // NOTE: access is synchronized via the MNativeProgramsMutex
386  std::unordered_map<pi::PiProgram, const RTDeviceBinaryImage *> NativePrograms;
387 
389  std::mutex MNativeProgramsMutex;
390 
391  using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
394  std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
395  m_EliminatedKernelArgMasks;
396 
398  bool m_UseSpvFile = false;
399  RTDeviceBinaryImageUPtr m_SpvFileImage;
400 
401  std::set<std::string> m_KernelUsesAssert;
402 
403  // True iff there is a device image compiled with AddressSanitizer
404  bool m_AsanFoundInImage;
405 
406  // Maps between device_global identifiers and associated information.
407  std::unordered_map<std::string, std::unique_ptr<DeviceGlobalMapEntry>>
408  m_DeviceGlobals;
409  std::unordered_map<const void *, DeviceGlobalMapEntry *> m_Ptr2DeviceGlobal;
410 
412  std::mutex m_DeviceGlobalsMutex;
413 
414  // Maps between host_pipe identifiers and associated information.
415  std::unordered_map<std::string, std::unique_ptr<HostPipeMapEntry>>
416  m_HostPipes;
417  std::unordered_map<const void *, HostPipeMapEntry *> m_Ptr2HostPipe;
418 
420  std::mutex m_HostPipesMutex;
421 };
422 } // namespace detail
423 } // namespace _V1
424 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
kernel_id getBuiltInKernelID(const std::string &KernelName)
sycl::detail::pi::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
void addImages(pi_device_binaries DeviceImages)
std::vector< device_image_plain > getSYCLDeviceImages(const context &Ctx, const std::vector< device > &Devs, bundle_state State)
static ProgramManager & getInstance()
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
const KernelArgMask * getEliminatedKernelArgMask(pi::PiProgram NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
sycl::detail::pi::PiProgram getBuiltPIProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={}, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
sycl::detail::pi::PiProgram getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, const ContextImplPtr Context)
bool kernelUsesAssert(const std::string &KernelName) const
std::pair< sycl::detail::pi::PiProgram, bool > getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a PI program using either a cached device code binary if present in the persistent cache or f...
static std::string getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, const ContextImplPtr Context)
void bringSYCLDeviceImagesToState(std::vector< device_image_plain > &DeviceImages, bundle_state TargetState)
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
device_image_plain compile(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
sycl::detail::pi::PiProgram getBuiltPIProgram(const context &Context, const device &Device, const std::string &KernelName, const property_list &PropList, bool JITCompilationIsRequired=false)
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={})
bool hasCompatibleImage(const device &Dev)
std::vector< device_image_plain > link(const device_image_plain &DeviceImages, const std::vector< device > &Devs, const property_list &PropList)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::vector< kernel_id > getAllSYCLKernelIDs()
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
Objects of the class identify kernel is some kernel_bundle related APIs.
Objects of the property_list class are containers for the SYCL properties.
::pi_kernel PiKernel
Definition: pi.hpp:117
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc)
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
std::vector< bool > KernelArgMask
static constexpr uint32_t ITTSpecConstId
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
std::shared_ptr< device_impl > DeviceImplPtr
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
std::vector< unsigned char > SerializedObj
Definition: util.hpp:69
Definition: access.hpp:18
pi_result piProgramRelease(pi_program program)
Definition: pi_cuda.cpp:324
C++ wrapper of extern "C" PI interfaces.
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:1201