DPC++ Runtime
Runtime libraries for oneAPI DPC++
device_image_impl.hpp
Go to the documentation of this file.
1 //==------- device_image_impl.hpp - SYCL device_image_impl -----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
15 #include <detail/plugin.hpp>
17 #include <sycl/context.hpp>
18 #include <sycl/detail/common.hpp>
19 #include <sycl/detail/pi.h>
20 #include <sycl/detail/pi.hpp>
21 #include <sycl/device.hpp>
22 #include <sycl/kernel_bundle.hpp>
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <cstring>
27 #include <memory>
28 #include <mutex>
29 #include <vector>
30 
31 namespace sycl {
32 inline namespace _V1 {
33 namespace detail {
34 
35 template <class T> struct LessByHash {
36  bool operator()(const T &LHS, const T &RHS) const {
37  return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
38  }
39 };
40 
41 // The class is impl counterpart for sycl::device_image
42 // It can represent a program in different states, kernel_id's it has and state
43 // of specialization constants for it
45 public:
46  // The struct maps specialization ID to offset in the binary blob where value
47  // for this spec const should be.
48  struct SpecConstDescT {
49  unsigned int ID = 0;
50  unsigned int CompositeOffset = 0;
51  unsigned int Size = 0;
52  unsigned int BlobOffset = 0;
53  // Indicates if the specialization constant was set to a value which is
54  // different from the default value.
55  bool IsSet = false;
56  };
57 
58  using SpecConstMapT = std::map<std::string, std::vector<SpecConstDescT>>;
59 
60  device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
61  std::vector<device> Devices, bundle_state State,
62  std::shared_ptr<std::vector<kernel_id>> KernelIDs,
64  : MBinImage(BinImage), MContext(std::move(Context)),
65  MDevices(std::move(Devices)), MState(State), MProgram(Program),
66  MKernelIDs(std::move(KernelIDs)),
67  MSpecConstsDefValBlob(getSpecConstsDefValBlob()) {
68  updateSpecConstSymMap();
69  }
70 
71  device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
72  std::vector<device> Devices, bundle_state State,
73  std::shared_ptr<std::vector<kernel_id>> KernelIDs,
75  const SpecConstMapT &SpecConstMap,
76  const std::vector<unsigned char> &SpecConstsBlob)
77  : MBinImage(BinImage), MContext(std::move(Context)),
78  MDevices(std::move(Devices)), MState(State), MProgram(Program),
79  MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob),
80  MSpecConstsDefValBlob(getSpecConstsDefValBlob()),
81  MSpecConstSymMap(SpecConstMap) {}
82 
83  bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
84  return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(),
85  KernelIDCand, LessByHash<kernel_id>{});
86  }
87 
88  bool has_kernel(const kernel_id &KernelIDCand,
89  const device &DeviceCand) const noexcept {
90  // If the device is in the device list and the kernel ID is in the kernel
91  // bundle, return true.
92  for (const device &Device : MDevices)
93  if (Device == DeviceCand)
94  return has_kernel(KernelIDCand);
95 
96  // Otherwise, if the device candidate is a sub-device it is also valid if
97  // its parent is valid.
98  if (!getSyclObjImpl(DeviceCand)->isRootDevice())
99  return has_kernel(KernelIDCand,
100  DeviceCand.get_info<info::device::parent_device>());
101 
102  return false;
103  }
104 
105  const std::vector<kernel_id> &get_kernel_ids() const noexcept {
106  return *MKernelIDs;
107  }
108 
110  // Lock the mutex to prevent when one thread in the middle of writing a
111  // new value while another thread is reading the value to pass it to
112  // JIT compiler.
113  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
114  return !MSpecConstSymMap.empty();
115  }
116 
118  // Specialization constants are natively supported in JIT mode on backends,
119  // that are using SPIR-V as IR
120 
121  // Not sure if it's possible currently, but probably it may happen if the
122  // kernel bundle is created with interop function. Now the only one such
123  // function is make_kernel(), but I'm not sure if it's even possible to
124  // use spec constant with such kernel. So, in such case we need to check
125  // if it's JIT or no somehow.
126  assert(MBinImage &&
127  "native_specialization_constant() called for unimplemented case");
128 
129  auto IsJITSPIRVTarget = [](const char *Target) {
130  return (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0 ||
131  strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV32) == 0);
132  };
133  return (MContext.get_backend() == backend::opencl ||
135  IsJITSPIRVTarget(MBinImage->getRawData().DeviceTargetSpec);
136  }
137 
138  bool has_specialization_constant(const char *SpecName) const noexcept {
139  // Lock the mutex to prevent when one thread in the middle of writing a
140  // new value while another thread is reading the value to pass it to
141  // JIT compiler.
142  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
143  return MSpecConstSymMap.count(SpecName) != 0;
144  }
145 
146  void set_specialization_constant_raw_value(const char *SpecName,
147  const void *Value) noexcept {
148  // Lock the mutex to prevent when one thread in the middle of writing a
149  // new value while another thread is reading the value to pass it to
150  // JIT compiler.
151  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
152 
153  if (MSpecConstSymMap.count(std::string{SpecName}) == 0)
154  return;
155 
156  std::vector<SpecConstDescT> &Descs =
157  MSpecConstSymMap[std::string{SpecName}];
158  for (SpecConstDescT &Desc : Descs) {
159  // If there is a default value of the specialization constant and it is
160  // the same as the value which is being set then do nothing, runtime is
161  // going to handle this case just like if only the default value of the
162  // specialization constant was provided.
163  if (MSpecConstsDefValBlob.size() &&
164  (std::memcmp(MSpecConstsDefValBlob.begin() + Desc.BlobOffset,
165  static_cast<const char *>(Value) + Desc.CompositeOffset,
166  Desc.Size) == 0)) {
167  // Now we have default value, so reset to false.
168  Desc.IsSet = false;
169  continue;
170  }
171 
172  // Value of the specialization constant is set to a value which is
173  // different from the default value.
174  Desc.IsSet = true;
175  std::memcpy(MSpecConstsBlob.data() + Desc.BlobOffset,
176  static_cast<const char *>(Value) + Desc.CompositeOffset,
177  Desc.Size);
178  }
179  }
180 
181  void get_specialization_constant_raw_value(const char *SpecName,
182  void *ValueRet) const noexcept {
183  bool IsSet = is_specialization_constant_set(SpecName);
184  // Lock the mutex to prevent when one thread in the middle of writing a
185  // new value while another thread is reading the value to pass it to
186  // JIT compiler.
187  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
188  assert(IsSet || MSpecConstsDefValBlob.size());
189  // operator[] can't be used here, since it's not marked as const
190  const std::vector<SpecConstDescT> &Descs =
191  MSpecConstSymMap.at(std::string{SpecName});
192  for (const SpecConstDescT &Desc : Descs) {
193  auto Blob =
194  IsSet ? MSpecConstsBlob.data() : MSpecConstsDefValBlob.begin();
195  std::memcpy(static_cast<char *>(ValueRet) + Desc.CompositeOffset,
196  Blob + Desc.BlobOffset, Desc.Size);
197  }
198  }
199 
200  bool is_specialization_constant_set(const char *SpecName) const noexcept {
201  // Lock the mutex to prevent when one thread in the middle of writing a
202  // new value while another thread is reading the value to pass it to
203  // JIT compiler.
204  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
205  if (MSpecConstSymMap.count(std::string{SpecName}) == 0)
206  return false;
207 
208  const std::vector<SpecConstDescT> &Descs =
209  MSpecConstSymMap.at(std::string{SpecName});
210  return Descs.front().IsSet;
211  }
212 
214  // Lock the mutex to prevent when one thread in the middle of writing a
215  // new value while another thread is reading the value to pass it to
216  // JIT compiler.
217  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
218  for (auto &SpecConst : MSpecConstSymMap) {
219  for (auto &Desc : SpecConst.second) {
220  if (Desc.IsSet)
221  return true;
222  }
223  }
224 
225  return false;
226  }
227 
230  MBinImage->getProperty("specConstsReplacedWithDefault");
231  return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0);
232  }
233 
234  bundle_state get_state() const noexcept { return MState; }
235 
236  void set_state(bundle_state NewState) noexcept { MState = NewState; }
237 
238  const std::vector<device> &get_devices() const noexcept { return MDevices; }
239 
240  bool compatible_with_device(const device &Dev) const {
241  return std::any_of(
242  MDevices.begin(), MDevices.end(),
243  [&Dev](const device &DevCand) { return Dev == DevCand; });
244  }
245 
247  return MProgram;
248  }
249 
250  const RTDeviceBinaryImage *&get_bin_image_ref() noexcept { return MBinImage; }
251 
252  const context &get_context() const noexcept { return MContext; }
253 
254  std::shared_ptr<std::vector<kernel_id>> &get_kernel_ids_ptr() noexcept {
255  return MKernelIDs;
256  }
257 
258  std::vector<unsigned char> &get_spec_const_blob_ref() noexcept {
259  return MSpecConstsBlob;
260  }
261 
263  std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
264  if (nullptr == MSpecConstsBuffer && !MSpecConstsBlob.empty()) {
265  const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin();
266  // Uses PI_MEM_FLAGS_HOST_PTR_COPY instead of PI_MEM_FLAGS_HOST_PTR_USE
267  // since post-enqueue cleanup might trigger destruction of
268  // device_image_impl and, as a result, destruction of MSpecConstsBlob
269  // while MSpecConstsBuffer is still in use.
270  // TODO consider changing the lifetime of device_image_impl instead
271  memBufferCreateHelper(Plugin,
272  detail::getSyclObjImpl(MContext)->getHandleRef(),
274  MSpecConstsBlob.size(), MSpecConstsBlob.data(),
275  &MSpecConstsBuffer, nullptr);
276  }
277  return MSpecConstsBuffer;
278  }
279 
281  return MSpecConstSymMap;
282  }
283 
285  return MSpecConstAccessMtx;
286  }
287 
289  assert(MProgram);
290  const auto &ContextImplPtr = detail::getSyclObjImpl(MContext);
291  const PluginPtr &Plugin = ContextImplPtr->getPlugin();
292 
293  if (ContextImplPtr->getBackend() == backend::opencl)
294  Plugin->call<PiApiKind::piProgramRetain>(MProgram);
295  pi_native_handle NativeProgram = 0;
296  Plugin->call<PiApiKind::piextProgramGetNativeHandle>(MProgram,
297  &NativeProgram);
298 
299  return NativeProgram;
300  }
301 
303 
304  if (MProgram) {
305  const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin();
306  Plugin->call<PiApiKind::piProgramRelease>(MProgram);
307  }
308  if (MSpecConstsBuffer) {
309  std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
310  const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin();
311  memReleaseHelper(Plugin, MSpecConstsBuffer);
312  }
313  }
314 
315 private:
316  // Get the specialization constant default value blob.
317  ByteArray getSpecConstsDefValBlob() const {
318  if (!MBinImage)
319  return ByteArray(nullptr, 0);
320 
321  // Get default values for specialization constants.
322  const RTDeviceBinaryImage::PropertyRange &SCDefValRange =
323  MBinImage->getSpecConstantsDefaultValues();
324  if (!SCDefValRange.size())
325  return ByteArray(nullptr, 0);
326 
327  ByteArray DefValDescriptors =
328  DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
329  // First 8 bytes are consumed by the size of the property.
330  DefValDescriptors.dropBytes(8);
331  return DefValDescriptors;
332  }
333 
334  void updateSpecConstSymMap() {
335  if (MBinImage) {
336  const RTDeviceBinaryImage::PropertyRange &SCRange =
337  MBinImage->getSpecConstants();
339 
340  // This variable is used to calculate spec constant value offset in a
341  // flat byte array.
342  unsigned BlobOffset = 0;
343  for (SCItTy SCIt : SCRange) {
344  const char *SCName = (*SCIt)->Name;
345 
346  ByteArray Descriptors = DeviceBinaryProperty(*SCIt).asByteArray();
347  // First 8 bytes are consumed by the size of the property.
348  Descriptors.dropBytes(8);
349 
350  // Expected layout is vector of 3-component tuples (flattened into a
351  // vector of scalars), where each tuple consists of: ID of a scalar spec
352  // constant, (which might be a member of the composite); offset, which
353  // is used to calculate location of scalar member within the composite
354  // or zero for scalar spec constants; size of a spec constant.
355  unsigned LocalOffset = 0;
356  while (!Descriptors.empty()) {
357  auto [Id, CompositeOffset, Size] =
358  Descriptors.consume<uint32_t, uint32_t, uint32_t>();
359 
360  // Make sure that alignment is correct in the blob.
361  const unsigned OffsetFromLast = CompositeOffset - LocalOffset;
362  BlobOffset += OffsetFromLast;
363  // Composites may have a special padding element at the end which
364  // should not have a descriptor. These padding elements all have max
365  // ID value.
367  // The map is not locked here because updateSpecConstSymMap() is
368  // only supposed to be called from c'tor.
369  MSpecConstSymMap[std::string{SCName}].push_back(
370  SpecConstDescT{Id, CompositeOffset, Size, BlobOffset});
371  }
372  LocalOffset += OffsetFromLast + Size;
373  BlobOffset += Size;
374  }
375  }
376  MSpecConstsBlob.resize(BlobOffset);
377 
378  if (MSpecConstsDefValBlob.size()) {
379  assert(MSpecConstsDefValBlob.size() == MSpecConstsBlob.size() &&
380  "Specialization constant default value blob do not have the "
381  "expected size.");
382  std::uninitialized_copy(MSpecConstsDefValBlob.begin(),
383  MSpecConstsDefValBlob.begin() +
384  MSpecConstsBlob.size(),
385  MSpecConstsBlob.data());
386  }
387  }
388  }
389 
390  const RTDeviceBinaryImage *MBinImage = nullptr;
391  context MContext;
392  std::vector<device> MDevices;
393  bundle_state MState;
394  // Native program handler which this device image represents
395  sycl::detail::pi::PiProgram MProgram = nullptr;
396  // List of kernel ids available in this image, elements should be sorted
397  // according to LessByNameComp
398  std::shared_ptr<std::vector<kernel_id>> MKernelIDs;
399 
400  // A mutex for sycnhronizing access to spec constants blob. Mutable because
401  // needs to be locked in the const method for getting spec constant value.
402  mutable std::mutex MSpecConstAccessMtx;
403  // Binary blob which can have values of all specialization constants in the
404  // image
405  std::vector<unsigned char> MSpecConstsBlob;
406  // Binary blob which can have default values of all specialization constants
407  // in the image.
408  const ByteArray MSpecConstsDefValBlob;
409  // Buffer containing binary blob which can have values of all specialization
410  // constants in the image, it is using for storing non-native specialization
411  // constants
412  sycl::detail::pi::PiMem MSpecConstsBuffer = nullptr;
413  // Contains map of spec const names to their descriptions + offsets in
414  // the MSpecConstsBlob
415  std::map<std::string, std::vector<SpecConstDescT>> MSpecConstSymMap;
416 };
417 
418 } // namespace detail
419 } // namespace _V1
420 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
backend get_backend() const noexcept
Returns the backend associated with this context.
Definition: context.cpp:132
void dropBytes(std::size_t Bytes)
const pi_device_binary_struct & getRawData() const
pi_device_binary_property getProperty(const char *PropName) const
Returns a single property from SYCL_MISC_PROP category.
const PropertyRange & getSpecConstants() const
Gets the iterator range over specialization constants in this binary image.
const PropertyRange & getSpecConstantsDefaultValues() const
const std::vector< device > & get_devices() const noexcept
std::shared_ptr< std::vector< kernel_id > > & get_kernel_ids_ptr() noexcept
bool has_kernel(const kernel_id &KernelIDCand) const noexcept
std::vector< unsigned char > & get_spec_const_blob_ref() noexcept
bool has_kernel(const kernel_id &KernelIDCand, const device &DeviceCand) const noexcept
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector< device > Devices, bundle_state State, std::shared_ptr< std::vector< kernel_id >> KernelIDs, sycl::detail::pi::PiProgram Program, const SpecConstMapT &SpecConstMap, const std::vector< unsigned char > &SpecConstsBlob)
bool is_specialization_constant_set(const char *SpecName) const noexcept
const SpecConstMapT & get_spec_const_data_ref() const noexcept
void set_specialization_constant_raw_value(const char *SpecName, const void *Value) noexcept
sycl::detail::pi::PiMem & get_spec_const_buffer_ref() noexcept
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
bool specialization_constants_replaced_with_default() const noexcept
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector< device > Devices, bundle_state State, std::shared_ptr< std::vector< kernel_id >> KernelIDs, sycl::detail::pi::PiProgram Program)
bool has_specialization_constants() const noexcept
const context & get_context() const noexcept
const std::vector< kernel_id > & get_kernel_ids() const noexcept
std::mutex & get_spec_const_data_lock() noexcept
bundle_state get_state() const noexcept
bool all_specialization_constant_native() const noexcept
bool compatible_with_device(const device &Dev) const
const sycl::detail::pi::PiProgram & get_program_ref() const noexcept
void get_specialization_constant_raw_value(const char *SpecName, void *ValueRet) const noexcept
bool has_specialization_constant(const char *SpecName) const noexcept
bool is_any_specialization_constant_set() const noexcept
const RTDeviceBinaryImage *& get_bin_image_ref() noexcept
void set_state(bundle_state NewState) noexcept
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:66
Objects of the class identify kernel is some kernel_bundle related APIs.
void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, pi_mem_flags Flags, size_t Size, void *HostPtr, pi_mem *RetMem, const pi_mem_properties *Props=nullptr)
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem)
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
std::string string
Definition: handler.hpp:426
Definition: access.hpp:18
uintptr_t pi_native_handle
Definition: pi.h:209
pi_result piProgramRetain(pi_program program)
Definition: pi_cuda.cpp:320
pi_result piProgramRelease(pi_program program)
Definition: pi_cuda.cpp:324
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY
Definition: pi.h:744
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64
SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device.
Definition: pi.h:946
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:740
#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV32
SPIR-V 32-bit image <-> "spir", 32-bit OpenCL device.
Definition: pi.h:944
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
Definition: pi_cuda.cpp:328
C++ wrapper of extern "C" PI interfaces.
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
const char * DeviceTargetSpec
null-terminated string representation of the device's target architecture which holds one of: __SYCL_...
Definition: pi.h:1025
bool operator()(const T &LHS, const T &RHS) const