DPC++ Runtime
Runtime libraries for oneAPI DPC++
program_manager.hpp
Go to the documentation of this file.
1 //==------ program_manager.hpp --- SYCL program manager---------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
15 #include <sycl/detail/cg_types.hpp>
16 #include <sycl/detail/common.hpp>
18 #include <sycl/detail/export.hpp>
20 #include <sycl/detail/os_util.hpp>
21 #include <sycl/detail/pi.hpp>
22 #include <sycl/detail/util.hpp>
23 #include <sycl/device.hpp>
24 #include <sycl/kernel_bundle.hpp>
25 
26 #include <cstdint>
27 #include <map>
28 #include <memory>
29 #include <set>
30 #include <unordered_map>
31 #include <unordered_set>
32 #include <vector>
33 
34 // +++ Entry points referenced by the offload wrapper object {
35 
38 extern "C" __SYCL_EXPORT void __sycl_register_lib(pi_device_binaries desc);
39 
43 extern "C" __SYCL_EXPORT void __sycl_unregister_lib(pi_device_binaries desc);
44 
45 // +++ }
46 
47 namespace sycl {
48 inline namespace _V1 {
49 class context;
50 namespace detail {
51 
52 bool doesDevSupportDeviceRequirements(const device &Dev,
53  const RTDeviceBinaryImage &BinImages);
54 std::optional<sycl::exception>
55 checkDevSupportDeviceRequirements(const device &Dev,
56  const RTDeviceBinaryImage &BinImages,
57  const NDRDescT &NDRDesc = {});
58 
59 // This value must be the same as in libdevice/device_itt.h.
60 // See sycl/doc/design/ITTAnnotations.md for more info.
61 static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
62 
63 class context_impl;
64 using ContextImplPtr = std::shared_ptr<context_impl>;
65 class device_impl;
66 using DeviceImplPtr = std::shared_ptr<device_impl>;
67 class program_impl;
68 class queue_impl;
69 class event_impl;
70 // DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
71 // If any update is made here, need to sync with DeviceLibExt definition
72 // in llvm/tools/sycl-post-link/sycl-post-link.cpp
73 enum class DeviceLibExt : std::uint32_t {
84 };
85 
86 // Provides single loading and building OpenCL programs with unique contexts
87 // that is necessary for no interoperability cases with lambda.
89 public:
90  // Returns the single instance of the program manager for the entire
91  // process. Can only be called after staticInit is done.
92  static ProgramManager &getInstance();
93 
94  RTDeviceBinaryImage &getDeviceImage(const std::string &KernelName,
95  const context &Context,
96  const device &Device,
97  bool JITCompilationIsRequired = false);
98 
100  const std::unordered_set<RTDeviceBinaryImage *> &ImagesToVerify,
101  const context &Context, const device &Device,
102  bool JITCompilationIsRequired = false);
103 
105  const context &Context,
106  const device &Device);
125  std::pair<sycl::detail::pi::PiProgram, bool>
126  getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
127  const device &Device,
128  const std::string &CompileAndLinkOptions,
129  SerializedObj SpecConsts);
140  getBuiltPIProgram(const ContextImplPtr &ContextImpl,
141  const DeviceImplPtr &DeviceImpl,
142  const std::string &KernelName, const NDRDescT &NDRDesc = {},
143  bool JITCompilationIsRequired = false);
144 
146  getBuiltPIProgram(const context &Context, const device &Device,
147  const std::string &KernelName,
148  const property_list &PropList,
149  bool JITCompilationIsRequired = false);
150 
151  std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *,
153  getOrCreateKernel(const ContextImplPtr &ContextImpl,
154  const DeviceImplPtr &DeviceImpl,
155  const std::string &KernelName,
156  const NDRDescT &NDRDesc = {});
157 
160  const ContextImplPtr Context);
161 
162  void addImages(pi_device_binaries DeviceImages);
163  void debugPrintBinaryImages() const;
164  static std::string
166  const ContextImplPtr Context);
167 
179  void flushSpecConstants(const program_impl &Prg,
180  pi::PiProgram NativePrg = nullptr,
181  const RTDeviceBinaryImage *Img = nullptr);
182  uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
183 
188  const KernelArgMask *
190  const std::string &KernelName);
191 
192  // The function returns the unique SYCL kernel identifier associated with a
193  // kernel name.
194  kernel_id getSYCLKernelID(const std::string &KernelName);
195 
196  // The function returns a vector containing all unique SYCL kernel identifiers
197  // in SYCL device images.
198  std::vector<kernel_id> getAllSYCLKernelIDs();
199 
200  // The function returns the unique SYCL kernel identifier associated with a
201  // built-in kernel name.
202  kernel_id getBuiltInKernelID(const std::string &KernelName);
203 
204  // The function inserts or initializes a device_global entry into the
205  // device_global map.
206  void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
207  const char *UniqueId);
208 
209  // Returns true if any available image is compatible with the device Dev.
210  bool hasCompatibleImage(const device &Dev);
211 
212  // The function gets a device_global entry identified by the pointer to the
213  // device_global object from the device_global map.
214  DeviceGlobalMapEntry *getDeviceGlobalEntry(const void *DeviceGlobalPtr);
215 
216  // The function gets multiple device_global entries identified by their unique
217  // IDs from the device_global map.
218  std::vector<DeviceGlobalMapEntry *>
219  getDeviceGlobalEntries(const std::vector<std::string> &UniqueIds,
220  bool ExcludeDeviceImageScopeDecorated = false);
221  // The function inserts or initializes a host_pipe entry into the
222  // host_pipe map.
223  void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId);
224 
225  // The function gets a host_pipe entry identified by the unique ID from
226  // the host_pipe map.
227  HostPipeMapEntry *getHostPipeEntry(const std::string &UniqueId);
228 
229  // The function gets a host_pipe entry identified by the pointer to the
230  // host_pipe object from the host_pipe map.
231  HostPipeMapEntry *getHostPipeEntry(const void *HostPipePtr);
232 
235  const context &Ctx, const device &Dev);
236 
237  // The function returns a vector of SYCL device images that are compiled with
238  // the required state and at least one device from the passed list of devices.
239  std::vector<device_image_plain> getSYCLDeviceImagesWithCompatibleState(
240  const context &Ctx, const std::vector<device> &Devs,
241  bundle_state TargetState, const std::vector<kernel_id> &KernelIDs = {});
242 
243  // Brind images in the passed vector to the required state. Does it inplace
244  void
245  bringSYCLDeviceImagesToState(std::vector<device_image_plain> &DeviceImages,
246  bundle_state TargetState);
247 
248  // The function returns a vector of SYCL device images in required state,
249  // which are compatible with at least one of the device from Devs.
250  std::vector<device_image_plain>
251  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
252  bundle_state State);
253 
254  // The function returns a vector of SYCL device images, for which Selector
255  // callable returns true, in required state, which are compatible with at
256  // least one of the device from Devs.
257  std::vector<device_image_plain>
258  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
259  const DevImgSelectorImpl &Selector,
260  bundle_state TargetState);
261 
262  // The function returns a vector of SYCL device images which represent at
263  // least one kernel from kernel ids vector in required state, which are
264  // compatible with at least one of the device from Devs.
265  std::vector<device_image_plain>
266  getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
267  const std::vector<kernel_id> &KernelIDs,
268  bundle_state TargetState);
269 
270  // Produces new device image by convering input device image to the object
271  // state
272  device_image_plain compile(const device_image_plain &DeviceImage,
273  const std::vector<device> &Devs,
274  const property_list &PropList);
275 
276  // Produces set of device images by convering input device images to object
277  // the executable state
278  std::vector<device_image_plain> link(const device_image_plain &DeviceImages,
279  const std::vector<device> &Devs,
280  const property_list &PropList);
281 
282  // Produces new device image by converting input device image to the
283  // executable state
284  device_image_plain build(const device_image_plain &DeviceImage,
285  const std::vector<device> &Devs,
286  const property_list &PropList);
287 
288  std::tuple<sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *>
289  getOrCreateKernel(const context &Context, const std::string &KernelName,
290  const property_list &PropList,
292 
293  ProgramManager();
294  ~ProgramManager() = default;
295 
296  bool kernelUsesAssert(const std::string &KernelName) const;
297 
298  bool kernelUsesAsan() const { return m_AsanFoundInImage; }
299 
300  std::set<RTDeviceBinaryImage *>
301  getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
302 
303 private:
304  ProgramManager(ProgramManager const &) = delete;
305  ProgramManager &operator=(ProgramManager const &) = delete;
306 
307  using ProgramPtr =
308  std::unique_ptr<remove_pointer_t<sycl::detail::pi::PiProgram>,
309  decltype(&::piProgramRelease)>;
310  ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context,
311  const std::string &CompileOptions,
312  const std::string &LinkOptions,
313  const sycl::detail::pi::PiDevice &Device,
314  uint32_t DeviceLibReqMask);
316  void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;
317 
319  void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img);
320 
323  using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
324 
329  //
330  std::unordered_map<std::string, kernel_id> m_KernelName2KernelIDs;
331 
332  // Maps KernelIDs to device binary images. There can be more than one image
333  // in case of SPIRV + AOT.
334  // Using shared_ptr to avoid expensive copy of the vector.
336  std::unordered_multimap<kernel_id, RTDeviceBinaryImage *>
337  m_KernelIDs2BinImage;
338 
339  // Maps device binary image to a vector of kernel ids in this image.
340  // Using shared_ptr to avoid expensive copy of the vector.
341  // The vector is initialized in addImages function and is supposed to be
342  // immutable afterwards.
344  std::unordered_map<RTDeviceBinaryImage *,
345  std::shared_ptr<std::vector<kernel_id>>>
346  m_BinImg2KernelIDs;
347 
352  std::mutex m_KernelIDsMutex;
353 
360  std::unordered_multimap<std::string, RTDeviceBinaryImage *> m_ServiceKernels;
361 
363  // from kernel bundles.
365  std::unordered_set<std::string> m_ExportedSymbols;
366 
369  std::unordered_set<RTDeviceBinaryImageUPtr> m_DeviceImages;
370 
373  std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
374 
376  std::mutex m_BuiltInKernelIDsMutex;
377 
378  // Keeps track of pi_program to image correspondence. Needed for:
379  // - knowing which specialization constants are used in the program and
380  // injecting their current values before compiling the SPIR-V; the binary
381  // image object has info about all spec constants used in the module
382  // - finding kernel argument masks for kernels associated with each
383  // pi_program
384  // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
385  // referenced from outside SYCL runtime and RTDeviceBinaryImage object
386  // lifetime matches program manager's one.
387  // NOTE: keys in the map can be invalid (reference count went to zero and
388  // the underlying program disposed of), so the map can't be used in any way
389  // other than binary image lookup with known live PiProgram as the key.
390  // NOTE: access is synchronized via the MNativeProgramsMutex
391  std::unordered_map<pi::PiProgram, const RTDeviceBinaryImage *> NativePrograms;
392 
394  std::mutex MNativeProgramsMutex;
395 
396  using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
399  std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
400  m_EliminatedKernelArgMasks;
401 
403  bool m_UseSpvFile = false;
404  RTDeviceBinaryImageUPtr m_SpvFileImage;
405 
406  std::set<std::string> m_KernelUsesAssert;
407 
408  // True iff there is a device image compiled with AddressSanitizer
409  bool m_AsanFoundInImage;
410 
411  // Maps between device_global identifiers and associated information.
412  std::unordered_map<std::string, std::unique_ptr<DeviceGlobalMapEntry>>
413  m_DeviceGlobals;
414  std::unordered_map<const void *, DeviceGlobalMapEntry *> m_Ptr2DeviceGlobal;
415 
417  std::mutex m_DeviceGlobalsMutex;
418 
419  // Maps between host_pipe identifiers and associated information.
420  std::unordered_map<std::string, std::unique_ptr<HostPipeMapEntry>>
421  m_HostPipes;
422  std::unordered_map<const void *, HostPipeMapEntry *> m_Ptr2HostPipe;
423 
425  std::mutex m_HostPipesMutex;
426 };
427 } // namespace detail
428 } // namespace _V1
429 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
std::set< RTDeviceBinaryImage * > getRawDeviceImages(const std::vector< kernel_id > &KernelIDs)
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img)
kernel_id getBuiltInKernelID(const std::string &KernelName)
sycl::detail::pi::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device)
void addImages(pi_device_binaries DeviceImages)
std::vector< device_image_plain > getSYCLDeviceImages(const context &Ctx, const std::vector< device > &Devs, bundle_state State)
static ProgramManager & getInstance()
std::vector< device_image_plain > getSYCLDeviceImagesWithCompatibleState(const context &Ctx, const std::vector< device > &Devs, bundle_state TargetState, const std::vector< kernel_id > &KernelIDs={})
const KernelArgMask * getEliminatedKernelArgMask(pi::PiProgram NativePrg, const std::string &KernelName)
Returns the mask for eliminated kernel arguments for the requested kernel within the native program.
sycl::detail::pi::PiProgram getBuiltPIProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={}, bool JITCompilationIsRequired=false)
Builds or retrieves from cache a program defining the kernel with given name.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId)
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
device_image_plain getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev)
RTDeviceBinaryImage & getDeviceImage(const std::string &KernelName, const context &Context, const device &Device, bool JITCompilationIsRequired=false)
sycl::detail::pi::PiProgram getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, const ContextImplPtr Context)
bool kernelUsesAssert(const std::string &KernelName) const
std::pair< sycl::detail::pi::PiProgram, bool > getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts)
Creates a PI program using either a cached device code binary if present in the persistent cache or f...
static std::string getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, const ContextImplPtr Context)
void bringSYCLDeviceImagesToState(std::vector< device_image_plain > &DeviceImages, bundle_state TargetState)
HostPipeMapEntry * getHostPipeEntry(const std::string &UniqueId)
device_image_plain build(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, const char *UniqueId)
void flushSpecConstants(const program_impl &Prg, pi::PiProgram NativePrg=nullptr, const RTDeviceBinaryImage *Img=nullptr)
Resolves given program to a device binary image and requests the program to flush constants the image...
device_image_plain compile(const device_image_plain &DeviceImage, const std::vector< device > &Devs, const property_list &PropList)
sycl::detail::pi::PiProgram getBuiltPIProgram(const context &Context, const device &Device, const std::string &KernelName, const property_list &PropList, bool JITCompilationIsRequired=false)
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc={})
bool hasCompatibleImage(const device &Dev)
std::vector< device_image_plain > link(const device_image_plain &DeviceImages, const std::vector< device > &Devs, const property_list &PropList)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::vector< kernel_id > getAllSYCLKernelIDs()
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
Objects of the class identify kernel is some kernel_bundle related APIs.
Objects of the property_list class are containers for the SYCL properties.
::pi_kernel PiKernel
Definition: pi.hpp:138
std::optional< sycl::exception > checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc)
std::function< bool(const detail::DeviceImageImplPtr &DevImgImpl)> DevImgSelectorImpl
std::vector< bool > KernelArgMask
static constexpr uint32_t ITTSpecConstId
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
std::shared_ptr< device_impl > DeviceImplPtr
bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img)
std::vector< unsigned char > SerializedObj
Definition: util.hpp:69
Definition: access.hpp:18
pi_result piProgramRelease(pi_program program)
Definition: pi_cuda.cpp:324
C++ wrapper of extern "C" PI interfaces.
void __sycl_register_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static initialization.
void __sycl_unregister_lib(pi_device_binaries desc)
Executed as a part of current module's (.exe, .dll) static de-initialization.
This struct is a record of all the device code that may be offloaded.
Definition: pi.h:1122