DPC++ Runtime
Runtime libraries for oneAPI DPC++
kernel_impl.hpp
Go to the documentation of this file.
1 //==------- kernel_impl.hpp --- SYCL kernel implementation -----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
14 #include <detail/kernel_info.hpp>
15 #include <sycl/detail/common.hpp>
16 #include <sycl/detail/pi.h>
17 #include <sycl/detail/pi.hpp>
18 #include <sycl/device.hpp>
20 #include <sycl/info/info_desc.hpp>
21 
22 #include <cassert>
23 #include <memory>
24 
25 namespace sycl {
26 inline namespace _V1 {
27 namespace detail {
28 // Forward declaration
29 class program_impl;
30 class kernel_bundle_impl;
31 
32 using ContextImplPtr = std::shared_ptr<context_impl>;
33 using ProgramImplPtr = std::shared_ptr<program_impl>;
34 using KernelBundleImplPtr = std::shared_ptr<kernel_bundle_impl>;
36 class kernel_impl {
37 public:
48  KernelBundleImplPtr KernelBundleImpl,
49  const KernelArgMask *ArgMask = nullptr);
50 
65  ProgramImplPtr ProgramImpl, bool IsCreatedFromSource,
66  KernelBundleImplPtr KernelBundleImpl,
67  const KernelArgMask *ArgMask);
68 
76  DeviceImageImplPtr DeviceImageImpl,
77  KernelBundleImplPtr KernelBundleImpl,
78  const KernelArgMask *ArgMask, PiProgram ProgramPI,
79  std::mutex *CacheMutex);
80 
85  kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl);
86 
87  // This section means the object is non-movable and non-copyable
88  // There is no need of move and copy constructors in kernel_impl.
89  // If they need to be added, piKernelRetain method for MKernel
90  // should be present.
91  kernel_impl(const kernel_impl &) = delete;
92  kernel_impl(kernel_impl &&) = delete;
93  kernel_impl &operator=(const kernel_impl &) = delete;
95 
96  ~kernel_impl();
97 
105  cl_kernel get() const {
106  if (is_host()) {
107  throw invalid_object_error(
108  "This instance of kernel doesn't support OpenCL interoperability.",
109  PI_ERROR_INVALID_KERNEL);
110  }
111  getPlugin()->call<PiApiKind::piKernelRetain>(MKernel);
112  return pi::cast<cl_kernel>(MKernel);
113  }
114 
118  bool is_host() const { return MContext->is_host(); }
119 
120  const PluginPtr &getPlugin() const { return MContext->getPlugin(); }
121 
126  template <typename Param> typename Param::return_type get_info() const;
127 
131  template <typename Param>
132  typename Param::return_type get_backend_info() const;
133 
139  template <typename Param>
140  typename Param::return_type get_info(const device &Device) const;
141 
149  template <typename Param>
150  typename Param::return_type get_info(const device &Device,
151  const range<3> &WGSize) const;
152 
153  template <typename Param>
154  typename Param::return_type ext_oneapi_get_info(const queue &q) const;
155 
164  const sycl::detail::pi::PiKernel &getHandleRef() const { return MKernel; }
165 
170  bool isCreatedFromSource() const;
171 
172  const DeviceImageImplPtr &getDeviceImage() const { return MDeviceImageImpl; }
173 
175  const PluginPtr &Plugin = MContext->getPlugin();
176 
177  if (MContext->getBackend() == backend::opencl)
178  Plugin->call<PiApiKind::piKernelRetain>(MKernel);
179 
180  pi_native_handle NativeKernel = 0;
181  Plugin->call<PiApiKind::piextKernelGetNativeHandle>(MKernel, &NativeKernel);
182 
183  return NativeKernel;
184  }
185 
186  KernelBundleImplPtr get_kernel_bundle() const { return MKernelBundleImpl; }
187 
188  bool isInterop() const { return MIsInterop; }
189 
190  PiProgram getProgramRef() const { return MProgram; }
191  ContextImplPtr getContextImplPtr() const { return MContext; }
192 
194  return MNoncacheableEnqueueMutex;
195  }
196 
197  const KernelArgMask *getKernelArgMask() const { return MKernelArgMaskPtr; }
198  std::mutex *getCacheMutex() const { return MCacheMutex; }
199 
200 private:
202  const ContextImplPtr MContext;
203  const PiProgram MProgram = nullptr;
204  bool MCreatedFromSource = true;
205  const DeviceImageImplPtr MDeviceImageImpl;
206  const KernelBundleImplPtr MKernelBundleImpl;
207  bool MIsInterop = false;
208  std::mutex MNoncacheableEnqueueMutex;
209  const KernelArgMask *MKernelArgMaskPtr;
210  std::mutex *MCacheMutex = nullptr;
211 
212  bool isBuiltInKernel(const device &Device) const;
213  void checkIfValidForNumArgsInfoQuery() const;
214 };
215 
216 template <typename Param>
217 inline typename Param::return_type kernel_impl::get_info() const {
218  static_assert(is_kernel_info_desc<Param>::value,
219  "Invalid kernel information descriptor");
220  if (is_host()) {
221  // TODO implement
222  assert(0 && "Not implemented");
223  }
224 
225  if constexpr (std::is_same_v<Param, info::kernel::num_args>)
226  checkIfValidForNumArgsInfoQuery();
227 
228  return get_kernel_info<Param>(this->getHandleRef(), getPlugin());
229 }
230 
231 template <>
232 inline context kernel_impl::get_info<info::kernel::context>() const {
233  return createSyclObjFromImpl<context>(MContext);
234 }
235 
236 template <typename Param>
237 inline typename Param::return_type
238 kernel_impl::get_info(const device &Device) const {
239  if constexpr (std::is_same_v<
240  Param, info::kernel_device_specific::global_work_size>) {
241  bool isDeviceCustom = Device.get_info<info::device::device_type>() ==
243  if (!isDeviceCustom && !isBuiltInKernel(Device))
244  throw exception(
246  "info::kernel_device_specific::global_work_size descriptor may only "
247  "be used if the device type is device_type::custom or if the kernel "
248  "is a built-in kernel.");
249  }
250 
251  if (is_host()) {
252  return get_kernel_device_specific_info_host<Param>(Device);
253  }
254  return get_kernel_device_specific_info<Param>(
255  this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
256  getPlugin());
257 }
258 
259 template <typename Param>
260 inline typename Param::return_type
262  const sycl::range<3> &WGSize) const {
263  if (is_host()) {
264  throw runtime_error("Sub-group feature is not supported on HOST device.",
265  PI_ERROR_INVALID_DEVICE);
266  }
267  return get_kernel_device_specific_info_with_input<Param>(
268  this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), WGSize,
269  getPlugin());
270 }
271 
272 template <>
276  ext::oneapi::experimental::info::kernel_queue_specific::
277  max_num_work_group_sync>(const queue &Queue) const {
278  const auto &Plugin = getPlugin();
279  const auto &Handle = getHandleRef();
280  const auto MaxWorkGroupSize =
281  Queue.get_device().get_info<info::device::max_work_group_size>();
282  pi_uint32 GroupCount = 0;
284  Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0, &GroupCount);
285  return GroupCount;
286 }
287 
288 } // namespace detail
289 } // namespace _V1
290 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
kernel_impl(sycl::detail::pi::PiKernel Kernel, ContextImplPtr Context, KernelBundleImplPtr KernelBundleImpl, const KernelArgMask *ArgMask=nullptr)
Constructs a SYCL kernel instance from a PiKernel.
Definition: kernel_impl.cpp:20
kernel_impl(const kernel_impl &)=delete
const KernelArgMask * getKernelArgMask() const
kernel_impl & operator=(const kernel_impl &)=delete
sycl::detail::pi::PiKernel & getHandleRef()
Get a reference to a raw kernel object.
kernel_impl & operator=(kernel_impl &&)=delete
Param::return_type get_info(const device &Device, const range< 3 > &WGSize) const
Query device-specific information from a kernel using the info::kernel_device_specific descriptor for...
std::mutex & getNoncacheableEnqueueMutex()
PiProgram getProgramRef() const
bool is_host() const
Check if the associated SYCL context is a SYCL host context.
kernel_impl(kernel_impl &&)=delete
ContextImplPtr getContextImplPtr() const
bool isCreatedFromSource() const
Check if kernel was created from a program that had been created from source.
Definition: kernel_impl.cpp:84
const PluginPtr & getPlugin() const
pi_native_handle getNative() const
const sycl::detail::pi::PiKernel & getHandleRef() const
Get a constant reference to a raw kernel object.
Param::return_type get_info() const
Query information from the kernel object using the info::kernel_info descriptor.
const DeviceImageImplPtr & getDeviceImage() const
Param::return_type get_backend_info() const
Queries the kernel object for SYCL backend-specific information.
KernelBundleImplPtr get_kernel_bundle() const
cl_kernel get() const
Gets a valid OpenCL kernel handle.
Param::return_type ext_oneapi_get_info(const queue &q) const
std::mutex * getCacheMutex() const
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.hpp:223
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:111
::pi_program PiProgram
Definition: pi.hpp:120
std::shared_ptr< device_image_impl > DeviceImageImplPtr
std::vector< bool > KernelArgMask
std::shared_ptr< program_impl > ProgramImplPtr
Definition: kernel_impl.hpp:33
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:47
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
sycl::detail::kernel_bundle_impl kernel_bundle_impl
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:93
Definition: access.hpp:18
uintptr_t pi_native_handle
Definition: pi.h:228
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_cuda.cpp:525
pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel kernel, size_t local_work_size, size_t dynamic_shared_memory_size, pi_uint32 *group_count_ret)
Gets the max work group count for a cooperative kernel.
Definition: pi_cuda.cpp:571
uint32_t pi_uint32
Definition: pi.h:224
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
Definition: pi_cuda.cpp:566
C++ wrapper of extern "C" PI interfaces.