DPC++ Runtime
Runtime libraries for oneAPI DPC++
kernel_impl.hpp
Go to the documentation of this file.
1 //==------- kernel_impl.hpp --- SYCL kernel implementation -----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
14 #include <detail/kernel_info.hpp>
15 #include <sycl/detail/common.hpp>
16 #include <sycl/detail/pi.h>
17 #include <sycl/detail/pi.hpp>
18 #include <sycl/device.hpp>
20 #include <sycl/info/info_desc.hpp>
21 
22 #include <cassert>
23 #include <memory>
24 
25 namespace sycl {
26 inline namespace _V1 {
27 namespace detail {
28 // Forward declaration
29 class program_impl;
30 class kernel_bundle_impl;
31 
32 using ContextImplPtr = std::shared_ptr<context_impl>;
33 using ProgramImplPtr = std::shared_ptr<program_impl>;
34 using KernelBundleImplPtr = std::shared_ptr<kernel_bundle_impl>;
35 class kernel_impl {
36 public:
47  KernelBundleImplPtr KernelBundleImpl,
48  const KernelArgMask *ArgMask = nullptr);
49 
64  ProgramImplPtr ProgramImpl, bool IsCreatedFromSource,
65  KernelBundleImplPtr KernelBundleImpl,
66  const KernelArgMask *ArgMask);
67 
75  DeviceImageImplPtr DeviceImageImpl,
76  KernelBundleImplPtr KernelBundleImpl,
77  const KernelArgMask *ArgMask, std::mutex *CacheMutex);
78 
83  kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl);
84 
85  // This section means the object is non-movable and non-copyable
86  // There is no need of move and copy constructors in kernel_impl.
87  // If they need to be added, piKernelRetain method for MKernel
88  // should be present.
89  kernel_impl(const kernel_impl &) = delete;
90  kernel_impl(kernel_impl &&) = delete;
91  kernel_impl &operator=(const kernel_impl &) = delete;
93 
94  ~kernel_impl();
95 
103  cl_kernel get() const {
104  if (is_host()) {
105  throw invalid_object_error(
106  "This instance of kernel doesn't support OpenCL interoperability.",
107  PI_ERROR_INVALID_KERNEL);
108  }
109  getPlugin()->call<PiApiKind::piKernelRetain>(MKernel);
110  return pi::cast<cl_kernel>(MKernel);
111  }
112 
116  bool is_host() const { return MContext->is_host(); }
117 
118  const PluginPtr &getPlugin() const { return MContext->getPlugin(); }
119 
124  template <typename Param> typename Param::return_type get_info() const;
125 
131  template <typename Param>
132  typename Param::return_type get_info(const device &Device) const;
133 
141  template <typename Param>
142  typename Param::return_type get_info(const device &Device,
143  const range<3> &WGSize) const;
144 
145  template <typename Param>
146  typename Param::return_type ext_oneapi_get_info(const queue &q) const;
147 
156  const sycl::detail::pi::PiKernel &getHandleRef() const { return MKernel; }
157 
162  bool isCreatedFromSource() const;
163 
164  const DeviceImageImplPtr &getDeviceImage() const { return MDeviceImageImpl; }
165 
167  const PluginPtr &Plugin = MContext->getPlugin();
168 
169  if (MContext->getBackend() == backend::opencl)
170  Plugin->call<PiApiKind::piKernelRetain>(MKernel);
171 
172  pi_native_handle NativeKernel = 0;
173  Plugin->call<PiApiKind::piextKernelGetNativeHandle>(MKernel, &NativeKernel);
174 
175  return NativeKernel;
176  }
177 
178  KernelBundleImplPtr get_kernel_bundle() const { return MKernelBundleImpl; }
179 
180  bool isInterop() const { return MIsInterop; }
181 
182  ProgramImplPtr getProgramImpl() const { return MProgramImpl; }
183  ContextImplPtr getContextImplPtr() const { return MContext; }
184 
186  return MNoncacheableEnqueueMutex;
187  }
188 
189  const KernelArgMask *getKernelArgMask() const { return MKernelArgMaskPtr; }
190  std::mutex *getCacheMutex() const { return MCacheMutex; }
191 
192 private:
194  const ContextImplPtr MContext;
195  const ProgramImplPtr MProgramImpl;
196  bool MCreatedFromSource = true;
197  const DeviceImageImplPtr MDeviceImageImpl;
198  const KernelBundleImplPtr MKernelBundleImpl;
199  bool MIsInterop = false;
200  std::mutex MNoncacheableEnqueueMutex;
201  const KernelArgMask *MKernelArgMaskPtr;
202  std::mutex *MCacheMutex = nullptr;
203 
204  bool isBuiltInKernel(const device &Device) const;
205  void checkIfValidForNumArgsInfoQuery() const;
206 };
207 
208 template <typename Param>
209 inline typename Param::return_type kernel_impl::get_info() const {
210  static_assert(is_kernel_info_desc<Param>::value,
211  "Invalid kernel information descriptor");
212  if (is_host()) {
213  // TODO implement
214  assert(0 && "Not implemented");
215  }
216 
217  if constexpr (std::is_same_v<Param, info::kernel::num_args>)
218  checkIfValidForNumArgsInfoQuery();
219 
220  return get_kernel_info<Param>(this->getHandleRef(), getPlugin());
221 }
222 
223 template <>
224 inline context kernel_impl::get_info<info::kernel::context>() const {
225  return createSyclObjFromImpl<context>(MContext);
226 }
227 
228 template <typename Param>
229 inline typename Param::return_type
230 kernel_impl::get_info(const device &Device) const {
231  if constexpr (std::is_same_v<
232  Param, info::kernel_device_specific::global_work_size>) {
233  bool isDeviceCustom = Device.get_info<info::device::device_type>() ==
235  if (!isDeviceCustom && !isBuiltInKernel(Device))
236  throw exception(
238  "info::kernel_device_specific::global_work_size descriptor may only "
239  "be used if the device type is device_type::custom or if the kernel "
240  "is a built-in kernel.");
241  }
242 
243  if (is_host()) {
244  return get_kernel_device_specific_info_host<Param>(Device);
245  }
246  return get_kernel_device_specific_info<Param>(
247  this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
248  getPlugin());
249 }
250 
251 template <typename Param>
252 inline typename Param::return_type
254  const sycl::range<3> &WGSize) const {
255  if (is_host()) {
256  throw runtime_error("Sub-group feature is not supported on HOST device.",
257  PI_ERROR_INVALID_DEVICE);
258  }
259  return get_kernel_device_specific_info_with_input<Param>(
260  this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), WGSize,
261  getPlugin());
262 }
263 
264 template <>
268  ext::oneapi::experimental::info::kernel_queue_specific::
269  max_num_work_group_sync>(const queue &Queue) const {
270  const auto &Plugin = getPlugin();
271  const auto &Handle = getHandleRef();
272  const auto MaxWorkGroupSize =
273  Queue.get_device().get_info<info::device::max_work_group_size>();
274  pi_uint32 GroupCount = 0;
276  Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0, &GroupCount);
277  return GroupCount;
278 }
279 
280 } // namespace detail
281 } // namespace _V1
282 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
kernel_impl(sycl::detail::pi::PiKernel Kernel, ContextImplPtr Context, KernelBundleImplPtr KernelBundleImpl, const KernelArgMask *ArgMask=nullptr)
Constructs a SYCL kernel instance from a PiKernel.
Definition: kernel_impl.cpp:20
kernel_impl(const kernel_impl &)=delete
const KernelArgMask * getKernelArgMask() const
kernel_impl & operator=(const kernel_impl &)=delete
sycl::detail::pi::PiKernel & getHandleRef()
Get a reference to a raw kernel object.
kernel_impl & operator=(kernel_impl &&)=delete
Param::return_type get_info(const device &Device, const range< 3 > &WGSize) const
Query device-specific information from a kernel using the info::kernel_device_specific descriptor for...
std::mutex & getNoncacheableEnqueueMutex()
bool is_host() const
Check if the associated SYCL context is a SYCL host context.
kernel_impl(kernel_impl &&)=delete
ProgramImplPtr getProgramImpl() const
ContextImplPtr getContextImplPtr() const
bool isCreatedFromSource() const
Check if kernel was created from a program that had been created from source.
Definition: kernel_impl.cpp:83
const PluginPtr & getPlugin() const
pi_native_handle getNative() const
const sycl::detail::pi::PiKernel & getHandleRef() const
Get a constant reference to a raw kernel object.
Param::return_type get_info() const
Query information from the kernel object using the info::kernel_info descriptor.
const DeviceImageImplPtr & getDeviceImage() const
KernelBundleImplPtr get_kernel_bundle() const
cl_kernel get() const
Gets a valid OpenCL kernel handle.
Param::return_type ext_oneapi_get_info(const queue &q) const
std::mutex * getCacheMutex() const
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:65
detail::ABINeutralT_t< typename detail::is_device_info_desc< Param >::return_type > get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.cpp:140
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:119
std::shared_ptr< device_image_impl > DeviceImageImplPtr
std::vector< bool > KernelArgMask
std::shared_ptr< program_impl > ProgramImplPtr
Definition: kernel_impl.hpp:33
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
sycl::detail::kernel_bundle_impl kernel_bundle_impl
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:107
Definition: access.hpp:18
uintptr_t pi_native_handle
Definition: pi.h:206
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_cuda.cpp:526
pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel kernel, size_t local_work_size, size_t dynamic_shared_memory_size, pi_uint32 *group_count_ret)
Gets the max work group count for a cooperative kernel.
Definition: pi_cuda.cpp:572
uint32_t pi_uint32
Definition: pi.h:202
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
Definition: pi_cuda.cpp:567
C++ wrapper of extern "C" PI interfaces.