DPC++ Runtime
Runtime libraries for oneAPI DPC++
device_image_impl.hpp
Go to the documentation of this file.
1 //==------- device_image_impl.hpp - SYCL device_image_impl -----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
15 #include <detail/plugin.hpp>
17 #include <sycl/context.hpp>
18 #include <sycl/detail/common.hpp>
19 #include <sycl/detail/pi.h>
20 #include <sycl/detail/pi.hpp>
21 #include <sycl/device.hpp>
22 #include <sycl/kernel_bundle.hpp>
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <cstring>
27 #include <memory>
28 #include <mutex>
29 #include <vector>
30 
31 namespace sycl {
33 namespace detail {
34 
35 template <class T> struct LessByHash {
36  bool operator()(const T &LHS, const T &RHS) const {
37  return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
38  }
39 };
40 
41 // The class is impl counterpart for sycl::device_image
42 // It can represent a program in different states, kernel_id's it has and state
43 // of specialization constants for it
45 public:
46  // The struct maps specialization ID to offset in the binary blob where value
47  // for this spec const should be.
48  struct SpecConstDescT {
49  unsigned int ID = 0;
50  unsigned int CompositeOffset = 0;
51  unsigned int Size = 0;
52  unsigned int BlobOffset = 0;
53  bool IsSet = false;
54  };
55 
56  using SpecConstMapT = std::map<std::string, std::vector<SpecConstDescT>>;
57 
58  device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
59  std::vector<device> Devices, bundle_state State,
60  std::shared_ptr<std::vector<kernel_id>> KernelIDs,
61  RT::PiProgram Program)
62  : MBinImage(BinImage), MContext(std::move(Context)),
63  MDevices(std::move(Devices)), MState(State), MProgram(Program),
64  MKernelIDs(std::move(KernelIDs)) {
65  updateSpecConstSymMap();
66  }
67 
68  device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
69  std::vector<device> Devices, bundle_state State,
70  std::shared_ptr<std::vector<kernel_id>> KernelIDs,
71  RT::PiProgram Program, const SpecConstMapT &SpecConstMap,
72  const std::vector<unsigned char> &SpecConstsBlob)
73  : MBinImage(BinImage), MContext(std::move(Context)),
74  MDevices(std::move(Devices)), MState(State), MProgram(Program),
75  MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob),
76  MSpecConstSymMap(SpecConstMap) {}
77 
78  bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
79  return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(),
80  KernelIDCand, LessByHash<kernel_id>{});
81  }
82 
83  bool has_kernel(const kernel_id &KernelIDCand,
84  const device &DeviceCand) const noexcept {
85  for (const device &Device : MDevices)
86  if (Device == DeviceCand)
87  return has_kernel(KernelIDCand);
88 
89  return false;
90  }
91 
92  const std::vector<kernel_id> &get_kernel_ids() const noexcept {
93  return *MKernelIDs;
94  }
95 
96  bool has_specialization_constants() const noexcept {
97  // Lock the mutex to prevent when one thread in the middle of writing a
98  // new value while another thread is reading the value to pass it to
99  // JIT compiler.
100  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
101  return !MSpecConstSymMap.empty();
102  }
103 
104  bool all_specialization_constant_native() const noexcept {
105  assert(false && "Not implemented");
106  return false;
107  }
108 
109  bool has_specialization_constant(const char *SpecName) const noexcept {
110  // Lock the mutex to prevent when one thread in the middle of writing a
111  // new value while another thread is reading the value to pass it to
112  // JIT compiler.
113  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
114  return MSpecConstSymMap.count(SpecName) != 0;
115  }
116 
117  void set_specialization_constant_raw_value(const char *SpecName,
118  const void *Value) noexcept {
119  // Lock the mutex to prevent when one thread in the middle of writing a
120  // new value while another thread is reading the value to pass it to
121  // JIT compiler.
122  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
123 
124  if (MSpecConstSymMap.count(std::string{SpecName}) == 0)
125  return;
126 
127  std::vector<SpecConstDescT> &Descs =
128  MSpecConstSymMap[std::string{SpecName}];
129  for (SpecConstDescT &Desc : Descs) {
130  Desc.IsSet = true;
131  std::memcpy(MSpecConstsBlob.data() + Desc.BlobOffset,
132  static_cast<const char *>(Value) + Desc.CompositeOffset,
133  Desc.Size);
134  }
135  }
136 
137  void get_specialization_constant_raw_value(const char *SpecName,
138  void *ValueRet) const noexcept {
139  assert(is_specialization_constant_set(SpecName));
140  // Lock the mutex to prevent when one thread in the middle of writing a
141  // new value while another thread is reading the value to pass it to
142  // JIT compiler.
143  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
144 
145  // operator[] can't be used here, since it's not marked as const
146  const std::vector<SpecConstDescT> &Descs =
147  MSpecConstSymMap.at(std::string{SpecName});
148  for (const SpecConstDescT &Desc : Descs) {
149 
150  std::memcpy(static_cast<char *>(ValueRet) + Desc.CompositeOffset,
151  MSpecConstsBlob.data() + Desc.BlobOffset, Desc.Size);
152  }
153  }
154 
155  bool is_specialization_constant_set(const char *SpecName) const noexcept {
156  // Lock the mutex to prevent when one thread in the middle of writing a
157  // new value while another thread is reading the value to pass it to
158  // JIT compiler.
159  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
160  if (MSpecConstSymMap.count(std::string{SpecName}) == 0)
161  return false;
162 
163  const std::vector<SpecConstDescT> &Descs =
164  MSpecConstSymMap.at(std::string{SpecName});
165  return Descs.front().IsSet;
166  }
167 
168  bundle_state get_state() const noexcept { return MState; }
169 
170  void set_state(bundle_state NewState) noexcept { MState = NewState; }
171 
172  const std::vector<device> &get_devices() const noexcept { return MDevices; }
173 
174  bool compatible_with_device(const device &Dev) const {
175  return std::any_of(
176  MDevices.begin(), MDevices.end(),
177  [&Dev](const device &DevCand) { return Dev == DevCand; });
178  }
179 
180  const RT::PiProgram &get_program_ref() const noexcept { return MProgram; }
181 
182  const RTDeviceBinaryImage *&get_bin_image_ref() noexcept { return MBinImage; }
183 
184  const context &get_context() const noexcept { return MContext; }
185 
186  std::shared_ptr<std::vector<kernel_id>> &get_kernel_ids_ptr() noexcept {
187  return MKernelIDs;
188  }
189 
190  std::vector<unsigned char> &get_spec_const_blob_ref() noexcept {
191  return MSpecConstsBlob;
192  }
193 
195  std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
196  if (nullptr == MSpecConstsBuffer && !MSpecConstsBlob.empty()) {
197  const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
198  // Uses PI_MEM_FLAGS_HOST_PTR_COPY instead of PI_MEM_FLAGS_HOST_PTR_USE
199  // since post-enqueue cleanup might trigger destruction of
200  // device_image_impl and, as a result, destruction of MSpecConstsBlob
201  // while MSpecConstsBuffer is still in use.
202  // TODO consider changing the lifetime of device_image_impl instead
203  memBufferCreateHelper(Plugin,
204  detail::getSyclObjImpl(MContext)->getHandleRef(),
206  MSpecConstsBlob.size(), MSpecConstsBlob.data(),
207  &MSpecConstsBuffer, nullptr);
208  }
209  return MSpecConstsBuffer;
210  }
211 
212  const SpecConstMapT &get_spec_const_data_ref() const noexcept {
213  return MSpecConstSymMap;
214  }
215 
216  std::mutex &get_spec_const_data_lock() noexcept {
217  return MSpecConstAccessMtx;
218  }
219 
221  assert(MProgram);
222  const auto &ContextImplPtr = detail::getSyclObjImpl(MContext);
223  const plugin &Plugin = ContextImplPtr->getPlugin();
224 
225  pi_native_handle NativeProgram = 0;
227  &NativeProgram);
228 
229  return NativeProgram;
230  }
231 
233 
234  if (MProgram) {
235  const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
236  Plugin.call<PiApiKind::piProgramRelease>(MProgram);
237  }
238  if (MSpecConstsBuffer) {
239  std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
240  const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
241  memReleaseHelper(Plugin, MSpecConstsBuffer);
242  }
243  }
244 
245 private:
246  void updateSpecConstSymMap() {
247  if (MBinImage) {
248  const RTDeviceBinaryImage::PropertyRange &SCRange =
249  MBinImage->getSpecConstants();
251 
252  // get default values for specialization constants
253  const RTDeviceBinaryImage::PropertyRange &SCDefValRange =
254  MBinImage->getSpecConstantsDefaultValues();
255 
256  // This variable is used to calculate spec constant value offset in a
257  // flat byte array.
258  unsigned BlobOffset = 0;
259  for (SCItTy SCIt : SCRange) {
260  const char *SCName = (*SCIt)->Name;
261 
262  ByteArray Descriptors = DeviceBinaryProperty(*SCIt).asByteArray();
263  // First 8 bytes are consumed by the size of the property.
264  Descriptors.dropBytes(8);
265 
266  // Expected layout is vector of 3-component tuples (flattened into a
267  // vector of scalars), where each tuple consists of: ID of a scalar spec
268  // constant, (which might be a member of the composite); offset, which
269  // is used to calculate location of scalar member within the composite
270  // or zero for scalar spec constants; size of a spec constant.
271  unsigned LocalOffset = 0;
272  while (!Descriptors.empty()) {
273  auto [Id, CompositeOffset, Size] =
274  Descriptors.consume<uint32_t, uint32_t, uint32_t>();
275 
276  // Make sure that alignment is correct in the blob.
277  const unsigned OffsetFromLast = CompositeOffset - LocalOffset;
278  BlobOffset += OffsetFromLast;
279  // Composites may have a special padding element at the end which
280  // should not have a descriptor. These padding elements all have max
281  // ID value.
283  // The map is not locked here because updateSpecConstSymMap() is
284  // only supposed to be called from c'tor.
285  MSpecConstSymMap[std::string{SCName}].push_back(
286  SpecConstDescT{Id, CompositeOffset, Size, BlobOffset});
287  }
288  LocalOffset += OffsetFromLast + Size;
289  BlobOffset += Size;
290  }
291  }
292  MSpecConstsBlob.resize(BlobOffset);
293 
294  bool HasDefaultValues = SCDefValRange.begin() != SCDefValRange.end();
295 
296  if (HasDefaultValues) {
297  ByteArray DefValDescriptors =
298  DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
299  assert(DefValDescriptors.size() - 8 == MSpecConstsBlob.size() &&
300  "Specialization constant default value blob do not have the "
301  "expected size.");
302  std::uninitialized_copy(&DefValDescriptors[8],
303  &DefValDescriptors[8] + MSpecConstsBlob.size(),
304  MSpecConstsBlob.data());
305  }
306  }
307  }
308 
309  const RTDeviceBinaryImage *MBinImage = nullptr;
310  context MContext;
311  std::vector<device> MDevices;
312  bundle_state MState;
313  // Native program handler which this device image represents
314  RT::PiProgram MProgram = nullptr;
315  // List of kernel ids available in this image, elements should be sorted
316  // according to LessByNameComp
317  std::shared_ptr<std::vector<kernel_id>> MKernelIDs;
318 
319  // A mutex for sycnhronizing access to spec constants blob. Mutable because
320  // needs to be locked in the const method for getting spec constant value.
321  mutable std::mutex MSpecConstAccessMtx;
322  // Binary blob which can have values of all specialization constants in the
323  // image
324  std::vector<unsigned char> MSpecConstsBlob;
325  // Buffer containing binary blob which can have values of all specialization
326  // constants in the image, it is using for storing non-native specialization
327  // constants
328  RT::PiMem MSpecConstsBuffer = nullptr;
329  // Contains map of spec const names to their descriptions + offsets in
330  // the MSpecConstsBlob
331  std::map<std::string, std::vector<SpecConstDescT>> MSpecConstSymMap;
332 };
333 
334 } // namespace detail
335 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
336 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:39
void dropBytes(std::size_t Bytes)
const std::vector< device > & get_devices() const noexcept
std::shared_ptr< std::vector< kernel_id > > & get_kernel_ids_ptr() noexcept
bool has_kernel(const kernel_id &KernelIDCand) const noexcept
std::vector< unsigned char > & get_spec_const_blob_ref() noexcept
bool has_kernel(const kernel_id &KernelIDCand, const device &DeviceCand) const noexcept
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector< device > Devices, bundle_state State, std::shared_ptr< std::vector< kernel_id >> KernelIDs, RT::PiProgram Program)
bool is_specialization_constant_set(const char *SpecName) const noexcept
const SpecConstMapT & get_spec_const_data_ref() const noexcept
void set_specialization_constant_raw_value(const char *SpecName, const void *Value) noexcept
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
const RT::PiProgram & get_program_ref() const noexcept
bool has_specialization_constants() const noexcept
const context & get_context() const noexcept
const std::vector< kernel_id > & get_kernel_ids() const noexcept
std::mutex & get_spec_const_data_lock() noexcept
bundle_state get_state() const noexcept
bool all_specialization_constant_native() const noexcept
bool compatible_with_device(const device &Dev) const
RT::PiMem & get_spec_const_buffer_ref() noexcept
void get_specialization_constant_raw_value(const char *SpecName, void *ValueRet) const noexcept
bool has_specialization_constant(const char *SpecName) const noexcept
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector< device > Devices, bundle_state State, std::shared_ptr< std::vector< kernel_id >> KernelIDs, RT::PiProgram Program, const SpecConstMapT &SpecConstMap, const std::vector< unsigned char > &SpecConstsBlob)
const RTDeviceBinaryImage *& get_bin_image_ref() noexcept
void set_state(bundle_state NewState) noexcept
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:47
Objects of the class identify kernel is some kernel_bundle related APIs.
#define __SYCL_INLINE_VER_NAMESPACE(X)
::pi_mem PiMem
Definition: pi.hpp:119
::pi_program PiProgram
Definition: pi.hpp:115
void memBufferCreateHelper(const plugin &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:30
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:240
void memcpy(void *Dst, const void *Src, std::size_t Size)
void memReleaseHelper(const plugin &Plugin, pi_mem Mem)
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
uintptr_t pi_native_handle
Definition: pi.h:109
pi_result piProgramRelease(pi_program program)
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY
Definition: pi.h:550
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:546
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
C++ wrapper of extern "C" PI interfaces.
@ Device
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
bool operator()(const T &LHS, const T &RHS) const