DPC++ Runtime
Runtime libraries for oneAPI DPC++
kernel_impl.hpp
Go to the documentation of this file.
1 //==------- kernel_impl.hpp --- SYCL kernel implementation -----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <detail/context_impl.hpp>
12 #include <detail/device_impl.hpp>
14 #include <detail/kernel_info.hpp>
15 #include <sycl/detail/common.hpp>
16 #include <sycl/detail/pi.h>
17 #include <sycl/detail/pi.hpp>
18 #include <sycl/device.hpp>
20 #include <sycl/info/info_desc.hpp>
21 
22 #include <cassert>
23 #include <memory>
24 
25 namespace sycl {
26 inline namespace _V1 {
27 namespace detail {
28 // Forward declaration
29 class kernel_bundle_impl;
30 
31 using ContextImplPtr = std::shared_ptr<context_impl>;
32 using KernelBundleImplPtr = std::shared_ptr<kernel_bundle_impl>;
34 class kernel_impl {
35 public:
45  KernelBundleImplPtr KernelBundleImpl,
46  const KernelArgMask *ArgMask = nullptr);
47 
55  DeviceImageImplPtr DeviceImageImpl,
56  KernelBundleImplPtr KernelBundleImpl,
57  const KernelArgMask *ArgMask, PiProgram ProgramPI,
58  std::mutex *CacheMutex);
59 
60  // This section means the object is non-movable and non-copyable
61  // There is no need of move and copy constructors in kernel_impl.
62  // If they need to be added, piKernelRetain method for MKernel
63  // should be present.
64  kernel_impl(const kernel_impl &) = delete;
65  kernel_impl(kernel_impl &&) = delete;
66  kernel_impl &operator=(const kernel_impl &) = delete;
68 
69  ~kernel_impl();
70 
78  cl_kernel get() const {
79  getPlugin()->call<PiApiKind::piKernelRetain>(MKernel);
80  return pi::cast<cl_kernel>(MKernel);
81  }
82 
83  const PluginPtr &getPlugin() const { return MContext->getPlugin(); }
84 
89  template <typename Param> typename Param::return_type get_info() const;
90 
94  template <typename Param>
95  typename Param::return_type get_backend_info() const;
96 
102  template <typename Param>
103  typename Param::return_type get_info(const device &Device) const;
104 
112  template <typename Param>
113  typename Param::return_type get_info(const device &Device,
114  const range<3> &WGSize) const;
115 
116  template <typename Param>
117  typename Param::return_type ext_oneapi_get_info(const queue &q) const;
118 
127  const sycl::detail::pi::PiKernel &getHandleRef() const { return MKernel; }
128 
133  bool isCreatedFromSource() const;
134 
135  const DeviceImageImplPtr &getDeviceImage() const { return MDeviceImageImpl; }
136 
138  const PluginPtr &Plugin = MContext->getPlugin();
139 
140  if (MContext->getBackend() == backend::opencl)
141  Plugin->call<PiApiKind::piKernelRetain>(MKernel);
142 
143  pi_native_handle NativeKernel = 0;
144  Plugin->call<PiApiKind::piextKernelGetNativeHandle>(MKernel, &NativeKernel);
145 
146  return NativeKernel;
147  }
148 
149  KernelBundleImplPtr get_kernel_bundle() const { return MKernelBundleImpl; }
150 
151  bool isInterop() const { return MIsInterop; }
152 
153  PiProgram getProgramRef() const { return MProgram; }
154  ContextImplPtr getContextImplPtr() const { return MContext; }
155 
157  return MNoncacheableEnqueueMutex;
158  }
159 
160  const KernelArgMask *getKernelArgMask() const { return MKernelArgMaskPtr; }
161  std::mutex *getCacheMutex() const { return MCacheMutex; }
162 
163 private:
165  const ContextImplPtr MContext;
166  const PiProgram MProgram = nullptr;
167  bool MCreatedFromSource = true;
168  const DeviceImageImplPtr MDeviceImageImpl;
169  const KernelBundleImplPtr MKernelBundleImpl;
170  bool MIsInterop = false;
171  std::mutex MNoncacheableEnqueueMutex;
172  const KernelArgMask *MKernelArgMaskPtr;
173  std::mutex *MCacheMutex = nullptr;
174 
175  bool isBuiltInKernel(const device &Device) const;
176  void checkIfValidForNumArgsInfoQuery() const;
177 };
178 
179 template <typename Param>
180 inline typename Param::return_type kernel_impl::get_info() const {
181  static_assert(is_kernel_info_desc<Param>::value,
182  "Invalid kernel information descriptor");
183  if constexpr (std::is_same_v<Param, info::kernel::num_args>)
184  checkIfValidForNumArgsInfoQuery();
185 
186  return get_kernel_info<Param>(this->getHandleRef(), getPlugin());
187 }
188 
189 template <>
190 inline context kernel_impl::get_info<info::kernel::context>() const {
191  return createSyclObjFromImpl<context>(MContext);
192 }
193 
194 template <typename Param>
195 inline typename Param::return_type
196 kernel_impl::get_info(const device &Device) const {
197  if constexpr (std::is_same_v<
198  Param, info::kernel_device_specific::global_work_size>) {
199  bool isDeviceCustom = Device.get_info<info::device::device_type>() ==
201  if (!isDeviceCustom && !isBuiltInKernel(Device))
202  throw exception(
204  "info::kernel_device_specific::global_work_size descriptor may only "
205  "be used if the device type is device_type::custom or if the kernel "
206  "is a built-in kernel.");
207  }
208 
209  return get_kernel_device_specific_info<Param>(
210  this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
211  getPlugin());
212 }
213 
214 template <typename Param>
215 inline typename Param::return_type
217  const sycl::range<3> &WGSize) const {
218  return get_kernel_device_specific_info_with_input<Param>(
219  this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), WGSize,
220  getPlugin());
221 }
222 
223 template <>
227  ext::oneapi::experimental::info::kernel_queue_specific::
228  max_num_work_group_sync>(const queue &Queue) const {
229  const auto &Plugin = getPlugin();
230  const auto &Handle = getHandleRef();
231  const auto MaxWorkGroupSize =
232  Queue.get_device().get_info<info::device::max_work_group_size>();
233  pi_uint32 GroupCount = 0;
235  Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0, &GroupCount);
236  return GroupCount;
237 }
238 
239 } // namespace detail
240 } // namespace _V1
241 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
kernel_impl(sycl::detail::pi::PiKernel Kernel, ContextImplPtr Context, KernelBundleImplPtr KernelBundleImpl, const KernelArgMask *ArgMask=nullptr)
Constructs a SYCL kernel instance from a PiKernel.
Definition: kernel_impl.cpp:19
kernel_impl(const kernel_impl &)=delete
const KernelArgMask * getKernelArgMask() const
kernel_impl & operator=(const kernel_impl &)=delete
sycl::detail::pi::PiKernel & getHandleRef()
Get a reference to a raw kernel object.
kernel_impl & operator=(kernel_impl &&)=delete
Param::return_type get_info(const device &Device, const range< 3 > &WGSize) const
Query device-specific information from a kernel using the info::kernel_device_specific descriptor for...
std::mutex & getNoncacheableEnqueueMutex()
PiProgram getProgramRef() const
kernel_impl(kernel_impl &&)=delete
ContextImplPtr getContextImplPtr() const
bool isCreatedFromSource() const
Check if kernel was created from a program that had been created from source.
Definition: kernel_impl.cpp:67
const PluginPtr & getPlugin() const
Definition: kernel_impl.hpp:83
pi_native_handle getNative() const
const sycl::detail::pi::PiKernel & getHandleRef() const
Get a constant reference to a raw kernel object.
Param::return_type get_info() const
Query information from the kernel object using the info::kernel_info descriptor.
const DeviceImageImplPtr & getDeviceImage() const
Param::return_type get_backend_info() const
Queries the kernel object for SYCL backend-specific information.
KernelBundleImplPtr get_kernel_bundle() const
cl_kernel get() const
Gets a valid OpenCL kernel handle.
Definition: kernel_impl.hpp:78
Param::return_type ext_oneapi_get_info(const queue &q) const
std::mutex * getCacheMutex() const
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.hpp:215
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
::pi_program PiProgram
Definition: pi.hpp:111
std::shared_ptr< device_image_impl > DeviceImageImplPtr
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:31
std::vector< bool > KernelArgMask
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:47
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
sycl::detail::kernel_bundle_impl kernel_bundle_impl
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
uintptr_t pi_native_handle
Definition: pi.h:267
pi_result piKernelRetain(pi_kernel kernel)
Definition: pi_cuda.cpp:529
pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel kernel, size_t local_work_size, size_t dynamic_shared_memory_size, pi_uint32 *group_count_ret)
Gets the max work group count for a cooperative kernel.
Definition: pi_cuda.cpp:588
uint32_t pi_uint32
Definition: pi.h:263
pi_result piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle)
Gets the native handle of a PI kernel object.
Definition: pi_cuda.cpp:583
C++ wrapper of extern "C" PI interfaces.