DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.hpp
Go to the documentation of this file.
1 //==------ program_manager.hpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
15 #include <sycl/detail/cg_types.hpp>
16 #include <sycl/detail/common.hpp>
18 #include <sycl/detail/export.hpp>
20 #include <sycl/detail/os_util.hpp>
21 #include <sycl/detail/pi.hpp>
22 #include <sycl/detail/util.hpp>
23 #include <sycl/device.hpp>
24 #include <sycl/kernel_bundle.hpp>
25 #include <sycl/stl.hpp>
26 
27 #include <cstdint>
28 #include <map>
29 #include <memory>
30 #include <set>
31 #include <unordered_map>
32 #include <unordered_set>
33 #include <vector>
34 
35 // +++ Entry points referenced by the offload wrapper object {
36 
39 extern "C" __SYCL_EXPORT void __sycl_register_lib(pi_device_binaries desc);
40 
44 extern "C" __SYCL_EXPORT void __sycl_unregister_lib(pi_device_binaries desc);
45 
46 // +++ }
47 
48 namespace sycl {
49 inline namespace _V1 {
50 class context;
51 namespace detail {
52 
53 bool doesDevSupportDeviceRequirements(const device &Dev,
54  const RTDeviceBinaryImage &BinImages);
55 std::optional<sycl::exception>
56 checkDevSupportDeviceRequirements(const device &Dev,
57  const RTDeviceBinaryImage &BinImages,
58  const NDRDescT &NDRDesc = {});
59 
60 // This value must be the same as in libdevice/device_itt.h.
61 // See sycl/doc/design/ITTAnnotations.md for more info.
62 static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
63 
64 class context_impl;
65 using ContextImplPtr = std::shared_ptr<context_impl>;
66 class device_impl;
67 using DeviceImplPtr = std::shared_ptr<device_impl>;
68 class program_impl;
69 class queue_impl;
70 class event_impl;
71 // DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
72 // If any update is made here, need to sync with DeviceLibExt definition
73 // in llvm/tools/sycl-post-link/sycl-post-link.cpp
74 enum class DeviceLibExt : std::uint32_t {
85 };
86 
87 // Provides single loading and building OpenCL programs with unique contexts
88 // that is necessary for no interoperability cases with lambda.
90 public:
91  // Returns the single instance of the program manager for the entire
92  // process. Can only be called after staticInit is done.
93  static ProgramManager &getInstance();
94 
95  RTDeviceBinaryImage &getDeviceImage(const std::string &KernelName,
96  const context &Context,
97  const device &Device,
98  bool JITCompilationIsRequired = false);
99 
101  const std::unordered_set<RTDeviceBinaryImage *> &ImagesToVerify,
102  const context &Context, const device &Device,
103  bool JITCompilationIsRequired = false);
104 
106  const context &Context,
107  const device &Device);
126  std::pair<sycl::detail::pi::PiProgram, bool>
127  getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
128  const device &Device,
129  const std::string &CompileAndLinkOptions,
130  SerializedObj SpecConsts);
141  getBuiltPIProgram(const ContextImplPtr &ContextImpl,
142  const DeviceImplPtr &DeviceImpl,
143  const std::string &KernelName, const NDRDescT &NDRDesc = {},
144  bool JITCompilationIsRequired = false);
145 
147  getBuiltPIProgram(const context &Context, const device &Device,
148  const std::string &KernelName,
149  const property_list &PropList,
150  bool JITCompilationIsRequired = false);
151 
152  std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *,
154  getOrCreateKernel(const ContextImplPtr &ContextImpl,
155  const DeviceImplPtr &DeviceImpl,
156  const std::string &KernelName,
157  const NDRDescT &NDRDesc = {});
158 
161  const ContextImplPtr Context);
162 
163  void addImages(pi_device_binaries DeviceImages);
164  void debugPrintBinaryImages() const;
165  static std::string
167  const ContextImplPtr Context);
168 
180  void flushSpecConstants(const program_impl &Prg,
181  pi::PiProgram NativePrg = nullptr,
182  const RTDeviceBinaryImage *Img = nullptr);
183  uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
184 
189  const KernelArgMask *
191  const std::string &KernelName);
192 
193  // The function returns the unique SYCL kernel identifier associated with a
194  // kernel name.
195  kernel_id getSYCLKernelID(const std::string &KernelName);
196 
197  // The function returns a vector containing all unique SYCL kernel identifiers
198  // in SYCL device images.
199  std::vector<kernel_id> getAllSYCLKernelIDs();
200 
201  // The function returns the unique SYCL kernel identifier associated with a
202  // built-in kernel name.
203  kernel_id getBuiltInKernelID(const std::string &KernelName);
204 
205  // The function inserts or initializes a device_global entry into the
206  // device_global map.
207  void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
208  const char *UniqueId);
209 
210  // Returns true if any available image is compatible with the device Dev.
211  bool hasCompatibleImage(const device &Dev);
212 
213  // The function gets a device_global entry identified by the pointer to the
214  // device_global object from the device_global map.
215  DeviceGlobalMapEntry *getDeviceGlobalEntry(const void *DeviceGlobalPtr);
216 
217  // The function gets multiple device_global entries identified by their unique
218  // IDs from the device_global map.
219  std::vector<DeviceGlobalMapEntry *>
220  getDeviceGlobalEntries(const std::vector<std::string> &UniqueIds,
221  bool ExcludeDeviceImageScopeDecorated = false);
222  // The function inserts or initializes a host_pipe entry into the
223  // host_pipe map.
224  void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId);
225 
226  // The function gets a host_pipe entry identified by the unique ID from
227  // the host_pipe map.
228  HostPipeMapEntry *getHostPipeEntry(const std::string &UniqueId);
229 
230  // The function gets a host_pipe entry identified by the pointer to the
231  // host_pipe object from the host_pipe map.
232  HostPipeMapEntry *getHostPipeEntry(const void *HostPipePtr);
233 
236  const context &Ctx, const device &Dev);
237 
238  // The function returns a vector of SYCL device images that are compiled with
239  // the required state and at least one device from the passed list of devices.
240  std::vector<device_image_plain> getSYCLDeviceImagesWithCompatibleState(
241  const context &Ctx, const std::vector<device> &Devs,
242  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs = {});
243 
244  // Brind images in the passed vector to the required state. Does it inplace
245  void
246  bringSYCLDeviceImagesToState(std::vector<device_image_plain> &DeviceImages,
247  bundle_state TargetState);
248 
249  // The function returns a vector of SYCL device images in required state,
250  // which are compatible with at least one of the device from Devs.
251  std::vector<device_image_plain>
252  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
253  bundle_state State);
254 
255  // The function returns a vector of SYCL device images, for which Selector
256  // callable returns true, in required state, which are compatible with at
257  // least one of the device from Devs.
258  std::vector<device_image_plain>
259  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
260  const DevImgSelectorImpl &Selector,
261  bundle_state TargetState);
262 
263  // The function returns a vector of SYCL device images which represent at
264  // least one kernel from kernel ids vector in required state, which are
265  // compatible with at least one of the device from Devs.
266  std::vector<device_image_plain>
267  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
268  const std::vector<kernel_id> &KernelIDs,
269  bundle_state TargetState);
270 
271  // Produces new device image by convering input device image to the object
272  // state
273  device_image_plain compile(const device_image_plain &DeviceImage,
274  const std::vector<device> &Devs,
275  const property_list &PropList);
276 
277  // Produces set of device images by convering input device images to object
278  // the executable state
279  std::vector<device_image_plain> link(const device_image_plain &DeviceImages,
280  const std::vector<device> &Devs,
281  const property_list &PropList);
282 
283  // Produces new device image by converting input device image to the
284  // executable state
285  device_image_plain build(const device_image_plain &DeviceImage,
286  const std::vector<device> &Devs,
287  const property_list &PropList);
288 
289  std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *>
290  getOrCreateKernel(const context &Context, const std::string &KernelName,
291  const property_list &PropList,
293 
294  ProgramManager();
295  ~ProgramManager() = default;
296 
297  bool kernelUsesAssert(const std::string &KernelName) const;
298 
299  bool kernelUsesAsan() const { return m_AsanFoundInImage; }
300 
301  std::set<RTDeviceBinaryImage *>
302  getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
303 
304 private:
305  ProgramManager(ProgramManager const &) = delete;
306  ProgramManager &operator=(ProgramManager const &) = delete;
307 
308  using ProgramPtr =
309  std::unique_ptr<remove_pointer_t<sycl::detail::pi::PiProgram>,
310  decltype(&::piProgramRelease)>;
311  ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context,
312  const std::string &CompileOptions,
313  const std::string &LinkOptions,
314  const sycl::detail::pi::PiDevice &Device,
315  uint32_t DeviceLibReqMask);
317  void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;
318 
320  void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img);
321 
324  using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
325 
330  //
331  std::unordered_map<std::string, kernel_id> m_KernelName2KernelIDs;
332 
333  // Maps KernelIDs to device binary images. There can be more than one image
334  // in case of SPIRV + AOT.
335  // Using shared_ptr to avoid expensive copy of the vector.
337  std::unordered_multimap<kernel_id, RTDeviceBinaryImage *>
338  m_KernelIDs2BinImage;
339 
340  // Maps device binary image to a vector of kernel ids in this image.
341  // Using shared_ptr to avoid expensive copy of the vector.
342  // The vector is initialized in addImages function and is supposed to be
343  // immutable afterwards.
345  std::unordered_map<RTDeviceBinaryImage *,
346  std::shared_ptr<std::vector<kernel_id>>>
347  m_BinImg2KernelIDs;
348 
353  std::mutex m_KernelIDsMutex;
354 
361  std::unordered_multimap<std::string, RTDeviceBinaryImage *> m_ServiceKernels;
362 
364  // from kernel bundles.
366  std::unordered_set<std::string> m_ExportedSymbols;
367 
370  std::unordered_set<RTDeviceBinaryImageUPtr> m_DeviceImages;
371 
374  std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
375 
377  std::mutex m_BuiltInKernelIDsMutex;
378 
379  // Keeps track of pi_program to image correspondence. Needed for:
380  // - knowing which specialization constants are used in the program and
381  // injecting their current values before compiling the SPIR-V; the binary
382  // image object has info about all spec constants used in the module
383  // - finding kernel argument masks for kernels associated with each
384  // pi_program
385  // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
386  // referenced from outside SYCL runtime and RTDeviceBinaryImage object
387  // lifetime matches program manager's one.
388  // NOTE: keys in the map can be invalid (reference count went to zero and
389  // the underlying program disposed of), so the map can't be used in any way
390  // other than binary image lookup with known live PiProgram as the key.
391  // NOTE: access is synchronized via the MNativeProgramsMutex
392  std::unordered_map<pi::PiProgram, const RTDeviceBinaryImage *> NativePrograms;
393 
395  std::mutex MNativeProgramsMutex;
396 
397  using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
400  std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
401  m_EliminatedKernelArgMasks;
402 
404  bool m_UseSpvFile = false;
405  RTDeviceBinaryImageUPtr m_SpvFileImage;
406 
407  std::set<std::string> m_KernelUsesAssert;
408 
409  // True iff there is a device image compiled with AddressSanitizer
410  bool m_AsanFoundInImage;
411 
412  // Maps between device_global identifiers and associated information.
413  std::unordered_map<std::string, std::unique_ptr<DeviceGlobalMapEntry>>
414  m_DeviceGlobals;
415  std::unordered_map<const void *, DeviceGlobalMapEntry *> m_Ptr2DeviceGlobal;
416 
418  std::mutex m_DeviceGlobalsMutex;
419 
420  // Maps between host_pipe identifiers and associated information.
421  std::unordered_map<std::string, std::unique_ptr<HostPipeMapEntry>>
422  m_HostPipes;
423  std::unordered_map<const void *, HostPipeMapEntry *> m_Ptr2HostPipe;
424 
426  std::mutex m_HostPipesMutex;
427 };
428 } // namespace detail
429 } // namespace _V1
430 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
kernel_id getBuiltInKernelID(const std::string &KernelName)
sycl::detail::pi::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
void addImages(pi_device_binaries DeviceImages)
std::vector< device_image_plain > getSYCLDeviceImages(const context &Ctx, const std::vector< device > &Devs, bundle_state State)
static ProgramManager & getInstance()
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
const KernelArgMask * getEliminatedKernelArgMask(pi::PiProgram NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
sycl::detail::pi::PiProgram getBuiltPIProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={}, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
sycl::detail::pi::PiProgram getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, const ContextImplPtr Context)
bool kernelUsesAssert(const std::string &KernelName) const
std::pair< sycl::detail::pi::PiProgram, bool > getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a PI program using either a cached device code binary if present in the persistent cache or f...
static std::string getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, const ContextImplPtr Context)
void bringSYCLDeviceImagesToState(std::vector< device_image_plain > &DeviceImages, bundle_state TargetState)
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
void flushSpecConstants(const program_impl &Prg, pi::PiProgram NativePrg=nullptr, const RTDeviceBinaryImage *Img=nullptr)
Resolves given program to a device binary image and requests the program to flush constants the image...
device_image_plain compile(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
sycl::detail::pi::PiProgram getBuiltPIProgram(const context &Context, const device &Device, const std::string &KernelName, const property_list &PropList, bool JITCompilationIsRequired=false)
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={})
bool hasCompatibleImage(const device &Dev)
std::vector< device_image_plain > link(const device_image_plain &DeviceImages, const std::vector< device > &Devs, const property_list &PropList)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::vector< kernel_id > getAllSYCLKernelIDs()
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:59
Objects of the class identify kernel is some kernel_bundle related APIs.
Objects of the property_list class are containers for the SYCL properties.
::pi_kernel PiKernel
Definition: pi.hpp:138
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc)
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
std::vector< bool > KernelArgMask
static constexpr uint32_t ITTSpecConstId
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:33
std::shared_ptr< device_impl > DeviceImplPtr
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
std::vector< unsigned char > SerializedObj
Definition: util.hpp:68
Definition: access.hpp:18
pi_result piProgramRelease(pi_program program)
Definition: pi_cuda.cpp:324
C++ wrapper of extern "C" PI interfaces.
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:1093