DPC++ Runtime
Runtime libraries for oneAPI Data Parallel C++
device_image_impl.hpp
Go to the documentation of this file.
1 //==------- device_image_impl.hpp - SYCL device_image_impl -----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/sycl/context.hpp>
13 #include <CL/sycl/detail/pi.h>
14 #include <CL/sycl/detail/pi.hpp>
15 #include <CL/sycl/device.hpp>
17 #include <detail/context_impl.hpp>
18 #include <detail/device_impl.hpp>
20 #include <detail/plugin.hpp>
22 
23 #include <algorithm>
24 #include <cassert>
25 #include <cstring>
26 #include <memory>
27 #include <mutex>
28 #include <vector>
29 
31 namespace sycl {
32 namespace detail {
33 
34 // The class is impl counterpart for sycl::device_image
35 // It can represent a program in different states, kernel_id's it has and state
36 // of specialization constants for it
38 public:
39  // The struct maps specialization ID to offset in the binary blob where value
40  // for this spec const should be.
41  struct SpecConstDescT {
42  unsigned int ID = 0;
43  unsigned int CompositeOffset = 0;
44  unsigned int Size = 0;
45  unsigned int BlobOffset = 0;
46  bool IsSet = false;
47  };
48 
49  using SpecConstMapT = std::map<std::string, std::vector<SpecConstDescT>>;
50 
51  device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
52  std::vector<device> Devices, bundle_state State,
53  std::vector<kernel_id> KernelIDs, RT::PiProgram Program)
54  : MBinImage(BinImage), MContext(std::move(Context)),
55  MDevices(std::move(Devices)), MState(State), MProgram(Program),
56  MKernelIDs(std::move(KernelIDs)) {
57  updateSpecConstSymMap();
58  }
59 
60  device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
61  std::vector<device> Devices, bundle_state State,
62  std::vector<kernel_id> KernelIDs, RT::PiProgram Program,
63  const SpecConstMapT &SpecConstMap,
64  const std::vector<unsigned char> &SpecConstsBlob)
65  : MBinImage(BinImage), MContext(std::move(Context)),
66  MDevices(std::move(Devices)), MState(State), MProgram(Program),
67  MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob),
68  MSpecConstSymMap(SpecConstMap) {}
69 
70  bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
71  return std::binary_search(MKernelIDs.begin(), MKernelIDs.end(),
72  KernelIDCand, LessByNameComp{});
73  }
74 
75  bool has_kernel(const kernel_id &KernelIDCand,
76  const device &DeviceCand) const noexcept {
77  for (const device &Device : MDevices)
78  if (Device == DeviceCand)
79  return has_kernel(KernelIDCand);
80 
81  return false;
82  }
83 
84  const std::vector<kernel_id> &get_kernel_ids() const noexcept {
85  return MKernelIDs;
86  }
87 
88  bool has_specialization_constants() const noexcept {
89  // Lock the mutex to prevent when one thread in the middle of writing a
90  // new value while another thread is reading the value to pass it to
91  // JIT compiler.
92  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
93  return !MSpecConstSymMap.empty();
94  }
95 
96  bool all_specialization_constant_native() const noexcept {
97  assert(false && "Not implemented");
98  return false;
99  }
100 
101  bool has_specialization_constant(const char *SpecName) const noexcept {
102  // Lock the mutex to prevent when one thread in the middle of writing a
103  // new value while another thread is reading the value to pass it to
104  // JIT compiler.
105  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
106  return MSpecConstSymMap.count(SpecName) != 0;
107  }
108 
109  void set_specialization_constant_raw_value(const char *SpecName,
110  const void *Value) noexcept {
111  // Lock the mutex to prevent when one thread in the middle of writing a
112  // new value while another thread is reading the value to pass it to
113  // JIT compiler.
114  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
115 
116  if (MSpecConstSymMap.count(std::string{SpecName}) == 0)
117  return;
118 
119  std::vector<SpecConstDescT> &Descs =
120  MSpecConstSymMap[std::string{SpecName}];
121  for (SpecConstDescT &Desc : Descs) {
122  Desc.IsSet = true;
123  std::memcpy(MSpecConstsBlob.data() + Desc.BlobOffset,
124  static_cast<const char *>(Value) + Desc.CompositeOffset,
125  Desc.Size);
126  }
127  }
128 
129  void get_specialization_constant_raw_value(const char *SpecName,
130  void *ValueRet) const noexcept {
131  assert(is_specialization_constant_set(SpecName));
132  // Lock the mutex to prevent when one thread in the middle of writing a
133  // new value while another thread is reading the value to pass it to
134  // JIT compiler.
135  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
136 
137  // operator[] can't be used here, since it's not marked as const
138  const std::vector<SpecConstDescT> &Descs =
139  MSpecConstSymMap.at(std::string{SpecName});
140  for (const SpecConstDescT &Desc : Descs) {
141 
142  std::memcpy(static_cast<char *>(ValueRet) + Desc.CompositeOffset,
143  MSpecConstsBlob.data() + Desc.BlobOffset, Desc.Size);
144  }
145  }
146 
147  bool is_specialization_constant_set(const char *SpecName) const noexcept {
148  // Lock the mutex to prevent when one thread in the middle of writing a
149  // new value while another thread is reading the value to pass it to
150  // JIT compiler.
151  const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
152  if (MSpecConstSymMap.count(std::string{SpecName}) == 0)
153  return false;
154 
155  const std::vector<SpecConstDescT> &Descs =
156  MSpecConstSymMap.at(std::string{SpecName});
157  return Descs.front().IsSet;
158  }
159 
160  bundle_state get_state() const noexcept { return MState; }
161 
162  void set_state(bundle_state NewState) noexcept { MState = NewState; }
163 
164  const std::vector<device> &get_devices() const noexcept { return MDevices; }
165 
166  bool compatible_with_device(const device &Dev) const {
167  return std::any_of(
168  MDevices.begin(), MDevices.end(),
169  [&Dev](const device &DevCand) { return Dev == DevCand; });
170  }
171 
172  const RT::PiProgram &get_program_ref() const noexcept { return MProgram; }
173 
174  const RTDeviceBinaryImage *&get_bin_image_ref() noexcept { return MBinImage; }
175 
176  const context &get_context() const noexcept { return MContext; }
177 
178  std::vector<kernel_id> &get_kernel_ids_ref() noexcept { return MKernelIDs; }
179 
180  std::vector<unsigned char> &get_spec_const_blob_ref() noexcept {
181  return MSpecConstsBlob;
182  }
183 
185  std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
186  if (nullptr == MSpecConstsBuffer && !MSpecConstsBlob.empty()) {
187  const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
189  detail::getSyclObjImpl(MContext)->getHandleRef(),
191  MSpecConstsBlob.size(), MSpecConstsBlob.data(), &MSpecConstsBuffer,
192  nullptr);
193  }
194  return MSpecConstsBuffer;
195  }
196 
197  const SpecConstMapT &get_spec_const_data_ref() const noexcept {
198  return MSpecConstSymMap;
199  }
200 
201  std::mutex &get_spec_const_data_lock() noexcept {
202  return MSpecConstAccessMtx;
203  }
204 
206  assert(MProgram);
207  const auto &ContextImplPtr = detail::getSyclObjImpl(MContext);
208  const plugin &Plugin = ContextImplPtr->getPlugin();
209 
210  pi_native_handle NativeProgram = 0;
212  &NativeProgram);
213 
214  return NativeProgram;
215  }
216 
218 
219  if (MProgram) {
220  const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
221  Plugin.call<PiApiKind::piProgramRelease>(MProgram);
222  }
223  }
224 
225 private:
226  void updateSpecConstSymMap() {
227  if (MBinImage) {
228  const pi::DeviceBinaryImage::PropertyRange &SCRange =
229  MBinImage->getSpecConstants();
231 
232  // get default values for specialization constants
233  const pi::DeviceBinaryImage::PropertyRange &SCDefValRange =
234  MBinImage->getSpecConstantsDefaultValues();
235 
236  // This variable is used to calculate spec constant value offset in a
237  // flat byte array.
238  unsigned BlobOffset = 0;
239  for (SCItTy SCIt : SCRange) {
240  const char *SCName = (*SCIt)->Name;
241 
242  pi::ByteArray Descriptors =
244  assert(Descriptors.size() > 8 && "Unexpected property size");
245 
246  // Expected layout is vector of 3-component tuples (flattened into a
247  // vector of scalars), where each tuple consists of: ID of a scalar spec
248  // constant, (which might be a member of the composite); offset, which
249  // is used to calculate location of scalar member within the composite
250  // or zero for scalar spec constants; size of a spec constant
251  constexpr size_t NumElements = 3;
252  assert(((Descriptors.size() - 8) / sizeof(std::uint32_t)) %
253  NumElements ==
254  0 &&
255  "unexpected layout of composite spec const descriptors");
256  auto *It = reinterpret_cast<const std::uint32_t *>(&Descriptors[8]);
257  auto *End = reinterpret_cast<const std::uint32_t *>(&Descriptors[0] +
258  Descriptors.size());
259  unsigned PrevOffset = 0;
260  while (It != End) {
261  // Make sure that alignment is correct in blob.
262  BlobOffset += /*Offset*/ It[1] - PrevOffset;
263  PrevOffset = It[1];
264  // The map is not locked here because updateSpecConstSymMap() is only
265  // supposed to be called from c'tor.
266  MSpecConstSymMap[std::string{SCName}].push_back(
267  SpecConstDescT{/*ID*/ It[0], /*CompositeOffset*/ It[1],
268  /*Size*/ It[2], BlobOffset});
269  BlobOffset += /*Size*/ It[2];
270  It += NumElements;
271  }
272  }
273  MSpecConstsBlob.resize(BlobOffset);
274 
275  bool HasDefaultValues = SCDefValRange.begin() != SCDefValRange.end();
276 
277  if (HasDefaultValues) {
278  pi::ByteArray DefValDescriptors =
279  pi::DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
280  std::uninitialized_copy(&DefValDescriptors[8],
281  &DefValDescriptors[8] + MSpecConstsBlob.size(),
282  MSpecConstsBlob.data());
283  }
284  }
285  }
286 
287  const RTDeviceBinaryImage *MBinImage = nullptr;
288  context MContext;
289  std::vector<device> MDevices;
290  bundle_state MState;
291  // Native program handler which this device image represents
292  RT::PiProgram MProgram = nullptr;
293  // List of kernel ids available in this image, elements should be sorted
294  // according to LessByNameComp
295  std::vector<kernel_id> MKernelIDs;
296 
297  // A mutex for sycnhronizing access to spec constants blob. Mutable because
298  // needs to be locked in the const method for getting spec constant value.
299  mutable std::mutex MSpecConstAccessMtx;
300  // Binary blob which can have values of all specialization constants in the
301  // image
302  std::vector<unsigned char> MSpecConstsBlob;
303  // Buffer containing binary blob which can have values of all specialization
304  // constants in the image, it is using for storing non-native specialization
305  // constants
306  RT::PiMem MSpecConstsBuffer = nullptr;
307  // Contains map of spec const names to their descriptions + offsets in
308  // the MSpecConstsBlob
309  std::map<std::string, std::vector<SpecConstDescT>> MSpecConstSymMap;
310 };
311 
312 } // namespace detail
313 } // namespace sycl
314 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::device_image_impl::device_image_impl
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector< device > Devices, bundle_state State, std::vector< kernel_id > KernelIDs, RT::PiProgram Program, const SpecConstMapT &SpecConstMap, const std::vector< unsigned char > &SpecConstsBlob)
Definition: device_image_impl.hpp:60
_pi_mem
PI Mem mapping to CUDA memory allocations, both data and texture/surface.
Definition: pi_cuda.hpp:208
cl::sycl::detail::device_image_impl::get_devices
const std::vector< device > & get_devices() const noexcept
Definition: device_image_impl.hpp:164
cl::sycl::detail::pi::DeviceBinaryProperty::asByteArray
ByteArray asByteArray() const
Definition: pi.cpp:640
pi.h
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:31
context_impl.hpp
cl::sycl::detail::RTDeviceBinaryImage
Definition: device_binary_image.hpp:20
cl::sycl::detail::device_image_impl::get_spec_const_buffer_ref
RT::PiMem & get_spec_const_buffer_ref() noexcept
Definition: device_image_impl.hpp:184
device.hpp
piextProgramGetNativeHandle
pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle)
Gets the native handle of a PI program object.
Definition: pi_esimd_emulator.cpp:1026
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::end
ConstIterator end() const
Definition: pi.hpp:278
cl::sycl::detail::device_image_impl::has_kernel
bool has_kernel(const kernel_id &KernelIDCand, const device &DeviceCand) const noexcept
Definition: device_image_impl.hpp:75
context.hpp
cl::sycl::detail::device_image_impl::set_specialization_constant_raw_value
void set_specialization_constant_raw_value(const char *SpecName, const void *Value) noexcept
Definition: device_image_impl.hpp:109
cl::sycl::detail::device_image_impl::device_image_impl
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector< device > Devices, bundle_state State, std::vector< kernel_id > KernelIDs, RT::PiProgram Program)
Definition: device_image_impl.hpp:51
cl::sycl::detail::device_image_impl::all_specialization_constant_native
bool all_specialization_constant_native() const noexcept
Definition: device_image_impl.hpp:96
kernel_id_impl.hpp
device_impl.hpp
cl::sycl::detail::device_image_impl::getNative
pi_native_handle getNative() const
Definition: device_image_impl.hpp:205
plugin.hpp
pi.hpp
cl::sycl::bundle_state
bundle_state
Definition: kernel_bundle_enums.hpp:14
cl::sycl::detail::device_image_impl::set_state
void set_state(bundle_state NewState) noexcept
Definition: device_image_impl.hpp:162
cl::sycl::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::begin
ConstIterator begin() const
Definition: pi.hpp:277
piProgramRelease
pi_result piProgramRelease(pi_program program)
Definition: pi_esimd_emulator.cpp:1024
cl::sycl::detail::device_image_impl::has_kernel
bool has_kernel(const kernel_id &KernelIDCand) const noexcept
Definition: device_image_impl.hpp:70
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:35
kernel_bundle.hpp
cl::sycl::kernel_id
Objects of the class identify kernel is some kernel_bundle related APIs.
Definition: kernel_bundle.hpp:39
cl::sycl::detail::device_image_impl::get_kernel_ids_ref
std::vector< kernel_id > & get_kernel_ids_ref() noexcept
Definition: device_image_impl.hpp:178
cl::sycl::detail::device_image_impl::get_context
const context & get_context() const noexcept
Definition: device_image_impl.hpp:176
cl::sycl::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:187
cl::sycl::detail::pi::DeviceBinaryProperty
Definition: pi.hpp:227
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
PI_MEM_FLAGS_HOST_PTR_USE
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE
Definition: pi.h:552
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange
Definition: pi.hpp:249
cl::sycl::detail::device_image_impl::compatible_with_device
bool compatible_with_device(const device &Dev) const
Definition: device_image_impl.hpp:166
cl::sycl::detail::device_image_impl::has_specialization_constants
bool has_specialization_constants() const noexcept
Definition: device_image_impl.hpp:88
cl::sycl::detail::device_image_impl
Definition: device_image_impl.hpp:37
cl::sycl::detail::device_image_impl::get_program_ref
const RT::PiProgram & get_program_ref() const noexcept
Definition: device_image_impl.hpp:172
cl::sycl::detail::device_image_impl::~device_image_impl
~device_image_impl()
Definition: device_image_impl.hpp:217
_pi_program
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:523
cl::sycl::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:89
program_manager.hpp
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:72
cl::sycl::detail::pi::ByteArray::size
std::size_t size() const
Definition: pi.hpp:217
PI_MEM_FLAGS_ACCESS_RW
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW
Definition: pi.h:549
cl::sycl::detail::device_image_impl::is_specialization_constant_set
bool is_specialization_constant_set(const char *SpecName) const noexcept
Definition: device_image_impl.hpp:147
cl::sycl::detail::device_image_impl::get_spec_const_data_lock
std::mutex & get_spec_const_data_lock() noexcept
Definition: device_image_impl.hpp:201
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:198
std
Definition: accessor.hpp:2397
piMemBufferCreate
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties=nullptr)
Definition: pi_esimd_emulator.cpp:758
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:35
cl::sycl::detail::pi::ByteArray
Definition: pi.hpp:211
cl::sycl::detail::device_image_impl::get_spec_const_data_ref
const SpecConstMapT & get_spec_const_data_ref() const noexcept
Definition: device_image_impl.hpp:197
common.hpp
cl::sycl::info::context
context
Definition: info_desc.hpp:41
cl::sycl::detail::device_image_impl::get_spec_const_blob_ref
std::vector< unsigned char > & get_spec_const_blob_ref() noexcept
Definition: device_image_impl.hpp:180
cl::sycl::detail::pi::DeviceBinaryImage::PropertyRange::ConstIterator
Definition: pi.hpp:253
cl::sycl::detail::device_image_impl::get_kernel_ids
const std::vector< kernel_id > & get_kernel_ids() const noexcept
Definition: device_image_impl.hpp:84
cl::sycl::detail::device_image_impl::get_state
bundle_state get_state() const noexcept
Definition: device_image_impl.hpp:160
cl::sycl::detail::device_image_impl::get_bin_image_ref
const RTDeviceBinaryImage *& get_bin_image_ref() noexcept
Definition: device_image_impl.hpp:174
cl::sycl::detail::LessByNameComp
Definition: kernel_id_impl.hpp:16
cl::sycl::detail::device_image_impl::has_specialization_constant
bool has_specialization_constant(const char *SpecName) const noexcept
Definition: device_image_impl.hpp:101
cl::sycl::detail::device_image_impl::SpecConstDescT
Definition: device_image_impl.hpp:41
cl::sycl::detail::device_image_impl::get_specialization_constant_raw_value
void get_specialization_constant_raw_value(const char *SpecName, void *ValueRet) const noexcept
Definition: device_image_impl.hpp:129
cl::sycl::detail::device_image_impl::SpecConstMapT
std::map< std::string, std::vector< SpecConstDescT > > SpecConstMapT
Definition: device_image_impl.hpp:49
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12