DPC++ Runtime
Runtime libraries for oneAPI DPC++
enqueue_kernel.cpp
Go to the documentation of this file.
1 //===------------------- enqueue_kernel.cpp ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // SYCL error handling of enqueue kernel operations
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "error_handling.hpp"
14 
15 #include <detail/plugin.hpp>
16 #include <sycl/backend_types.hpp>
17 #include <sycl/detail/pi.hpp>
18 
19 namespace sycl {
21 namespace detail {
22 
23 namespace enqueue_kernel_launch {
24 
25 void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
26  const NDRDescT &NDRDesc) {
27  const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0);
28 
29  const plugin &Plugin = DeviceImpl.getPlugin();
30  RT::PiDevice Device = DeviceImpl.getHandleRef();
31  sycl::platform Platform = DeviceImpl.get_platform();
32 
33  if (HasLocalSize) {
34  size_t MaxThreadsPerBlock[3] = {};
36  Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock),
37  MaxThreadsPerBlock, nullptr);
38 
39  for (size_t I = 0; I < 3; ++I) {
40  if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) {
41  throw sycl::nd_range_error(
42  "The number of work-items in each dimension of a work-group cannot "
43  "exceed {" +
44  std::to_string(MaxThreadsPerBlock[0]) + ", " +
45  std::to_string(MaxThreadsPerBlock[1]) + ", " +
46  std::to_string(MaxThreadsPerBlock[2]) + "} for this device",
47  PI_ERROR_INVALID_WORK_GROUP_SIZE);
48  }
49  }
50  }
51 
52  // Some of the error handling below is special for particular OpenCL
53  // versions. If this is an OpenCL backend, get the version.
54  bool IsOpenCL = false; // Backend is any OpenCL version
55  bool IsOpenCLV1x = false; // Backend is OpenCL 1.x
56  bool IsOpenCLV20 = false; // Backend is OpenCL 2.0
57  if (Platform.get_backend() == sycl::backend::opencl) {
58  std::string VersionString = DeviceImpl.get_info<info::device::version>();
59  IsOpenCL = true;
60  IsOpenCLV1x = (VersionString.find("1.") == 0);
61  IsOpenCLV20 = (VersionString.find("2.0") == 0);
62  }
63 
64  size_t CompileWGSize[3] = {0};
67  sizeof(size_t) * 3, CompileWGSize, nullptr);
68 
69  if (CompileWGSize[0] != 0) {
70  // OpenCL 1.x && 2.0:
71  // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and the
72  // reqd_work_group_size attribute is used to declare the work-group size
73  // for kernel in the program source.
74  if (!HasLocalSize && (IsOpenCLV1x || IsOpenCLV20)) {
75  throw sycl::nd_range_error(
76  "OpenCL 1.x and 2.0 requires to pass local size argument even if "
77  "required work-group size was specified in the program source",
78  PI_ERROR_INVALID_WORK_GROUP_SIZE);
79  }
80  // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does
81  // not match the required work-group size for kernel in the program source.
82  if (NDRDesc.LocalSize[0] != CompileWGSize[0] ||
83  NDRDesc.LocalSize[1] != CompileWGSize[1] ||
84  NDRDesc.LocalSize[2] != CompileWGSize[2])
85  throw sycl::nd_range_error(
86  "The specified local size {" + std::to_string(NDRDesc.LocalSize[2]) +
87  ", " + std::to_string(NDRDesc.LocalSize[1]) + ", " +
88  std::to_string(NDRDesc.LocalSize[0]) +
89  "} doesn't match the required work-group size specified "
90  "in the program source {" +
91  std::to_string(CompileWGSize[2]) + ", " +
92  std::to_string(CompileWGSize[1]) + ", " +
93  std::to_string(CompileWGSize[0]) + "}",
94  PI_ERROR_INVALID_WORK_GROUP_SIZE);
95  }
96  if (IsOpenCL) {
97  if (IsOpenCLV1x) {
98  // OpenCL 1.x:
99  // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
100  // the total number of work-items in the work-group computed as
101  // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
102  // than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in
103  // table 4.3
104  size_t MaxWGSize = 0;
107  &MaxWGSize, nullptr);
108  const size_t TotalNumberOfWIs =
109  NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
110  if (TotalNumberOfWIs > MaxWGSize)
111  throw sycl::nd_range_error(
112  "Total number of work-items in a work-group cannot exceed " +
113  std::to_string(MaxWGSize),
114  PI_ERROR_INVALID_WORK_GROUP_SIZE);
115  } else {
116  // OpenCL 2.x:
117  // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
118  // the total number of work-items in the work-group computed as
119  // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
120  // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in
121  // table 5.21.
122  size_t KernelWGSize = 0;
124  Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t),
125  &KernelWGSize, nullptr);
126  const size_t TotalNumberOfWIs =
127  NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
128  if (TotalNumberOfWIs > KernelWGSize)
129  throw sycl::nd_range_error(
130  "Total number of work-items in a work-group cannot exceed " +
131  std::to_string(KernelWGSize) + " for this kernel",
132  PI_ERROR_INVALID_WORK_GROUP_SIZE);
133  }
134  } else {
135  // TODO: Should probably have something similar for the other backends
136  }
137 
138  if (HasLocalSize) {
139  // Is the global range size evenly divisible by the local workgroup size?
140  const bool NonUniformWGs =
141  (NDRDesc.LocalSize[0] != 0 &&
142  NDRDesc.GlobalSize[0] % NDRDesc.LocalSize[0] != 0) ||
143  (NDRDesc.LocalSize[1] != 0 &&
144  NDRDesc.GlobalSize[1] % NDRDesc.LocalSize[1] != 0) ||
145  (NDRDesc.LocalSize[2] != 0 &&
146  NDRDesc.GlobalSize[2] % NDRDesc.LocalSize[2] != 0);
147  // Is the local size of the workgroup greater than the global range size in
148  // any dimension?
149  if (IsOpenCL) {
150  const bool LocalExceedsGlobal =
151  NonUniformWGs && (NDRDesc.LocalSize[0] > NDRDesc.GlobalSize[0] ||
152  NDRDesc.LocalSize[1] > NDRDesc.GlobalSize[1] ||
153  NDRDesc.LocalSize[2] > NDRDesc.GlobalSize[2]);
154 
155  if (NonUniformWGs) {
156  if (IsOpenCLV1x) {
157  // OpenCL 1.x:
158  // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified
159  // and number of workitems specified by global_work_size is not evenly
160  // divisible by size of work-group given by local_work_size
161  if (LocalExceedsGlobal)
162  throw sycl::nd_range_error("Local workgroup size cannot be greater "
163  "than global range in any dimension",
164  PI_ERROR_INVALID_WORK_GROUP_SIZE);
165  else
166  throw sycl::nd_range_error(
167  "Global_work_size must be evenly divisible by local_work_size. "
168  "Non-uniform work-groups are not supported by the target "
169  "device",
170  PI_ERROR_INVALID_WORK_GROUP_SIZE);
171  } else {
172  // OpenCL 2.x:
173  // PI_ERROR_INVALID_WORK_GROUP_SIZE if the program was compiled with
174  // –cl-uniform-work-group-size and the number of work-items specified
175  // by global_work_size is not evenly divisible by size of work-group
176  // given by local_work_size
177 
178  pi_program Program = nullptr;
180  Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(pi_program), &Program,
181  nullptr);
182  size_t OptsSize = 0;
184  Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr,
185  &OptsSize);
186  std::string Opts(OptsSize, '\0');
188  Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize,
189  &Opts.front(), nullptr);
190  const bool HasStd20 = Opts.find("-cl-std=CL2.0") != std::string::npos;
191  const bool RequiresUniformWGSize =
192  Opts.find("-cl-uniform-work-group-size") != std::string::npos;
193  std::string LocalWGSize = std::to_string(NDRDesc.LocalSize[0]) +
194  ", " +
195  std::to_string(NDRDesc.LocalSize[1]) +
196  ", " + std::to_string(NDRDesc.LocalSize[2]);
197  std::string GlobalWGSize =
198  std::to_string(NDRDesc.GlobalSize[0]) + ", " +
199  std::to_string(NDRDesc.GlobalSize[1]) + ", " +
200  std::to_string(NDRDesc.GlobalSize[2]);
201  std::string message =
202  LocalExceedsGlobal
203  ? "Local work-group size {" + LocalWGSize +
204  "} is greater than global range size {" + GlobalWGSize +
205  "}. "
206  : "Global work size {" + GlobalWGSize +
207  "} is not evenly divisible by local work-group size {" +
208  LocalWGSize + "}. ";
209  if (!HasStd20)
210  throw sycl::nd_range_error(
211  message.append(
212  "Non-uniform work-groups are not allowed by "
213  "default. Underlying "
214  "OpenCL 2.x implementation supports this feature "
215  "and to enable "
216  "it, build device program with -cl-std=CL2.0"),
217  PI_ERROR_INVALID_WORK_GROUP_SIZE);
218  else if (RequiresUniformWGSize)
219  throw sycl::nd_range_error(
220  message.append(
221  "Non-uniform work-groups are not allowed by when "
222  "-cl-uniform-work-group-size flag is used. Underlying "
223  "OpenCL 2.x implementation supports this feature, but it "
224  "is "
225  "being "
226  "disabled by -cl-uniform-work-group-size build flag"),
227  PI_ERROR_INVALID_WORK_GROUP_SIZE);
228  // else unknown. fallback (below)
229  }
230  }
231  } else {
232  // TODO: Decide what checks (if any) we need for the other backends
233  }
234  throw sycl::nd_range_error(
235  "Non-uniform work-groups are not supported by the target device",
236  PI_ERROR_INVALID_WORK_GROUP_SIZE);
237  }
238  // TODO: required number of sub-groups, OpenCL 2.1:
239  // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not
240  // consistent with the required number of sub-groups for kernel in the
241  // program source.
242 
243  // Fallback
244  constexpr pi_result Error = PI_ERROR_INVALID_WORK_GROUP_SIZE;
245  throw runtime_error(
246  "PI backend failed. PI backend returns: " + codeToString(Error), Error);
247 }
248 
249 void handleInvalidWorkItemSize(const device_impl &DeviceImpl,
250  const NDRDescT &NDRDesc) {
251 
252  const plugin &Plugin = DeviceImpl.getPlugin();
253  RT::PiDevice Device = DeviceImpl.getHandleRef();
254 
255  size_t MaxWISize[] = {0, 0, 0};
256 
258  Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxWISize), &MaxWISize,
259  nullptr);
260  for (unsigned I = 0; I < NDRDesc.Dims; I++) {
261  if (NDRDesc.LocalSize[I] > MaxWISize[I])
262  throw sycl::nd_range_error(
263  "Number of work-items in a work-group exceed limit for dimension " +
264  std::to_string(I) + " : " + std::to_string(NDRDesc.LocalSize[I]) +
265  " > " + std::to_string(MaxWISize[I]),
266  PI_ERROR_INVALID_WORK_ITEM_SIZE);
267  }
268 }
269 
270 void handleInvalidValue(const device_impl &DeviceImpl,
271  const NDRDescT &NDRDesc) {
272  const plugin &Plugin = DeviceImpl.getPlugin();
273  RT::PiDevice Device = DeviceImpl.getHandleRef();
274 
275  size_t MaxNWGs[] = {0, 0, 0};
278  &MaxNWGs, nullptr);
279  for (unsigned int I = 0; I < NDRDesc.Dims; I++) {
280  size_t NWgs = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
281  if (NWgs > MaxNWGs[I])
282  throw sycl::nd_range_error(
283  "Number of work-groups exceed limit for dimension " +
284  std::to_string(I) + " : " + std::to_string(NWgs) + " > " +
285  std::to_string(MaxNWGs[I]),
286  PI_ERROR_INVALID_VALUE);
287  }
288 
289  // fallback
290  constexpr pi_result Error = PI_ERROR_INVALID_VALUE;
291  throw runtime_error(
292  "Native API failed. Native API returns: " + codeToString(Error), Error);
293 }
294 
295 void handleErrorOrWarning(pi_result Error, const device_impl &DeviceImpl,
296  pi_kernel Kernel, const NDRDescT &NDRDesc) {
297  assert(Error != PI_SUCCESS &&
298  "Success is expected to be handled on caller side");
299  switch (Error) {
300  case PI_ERROR_INVALID_WORK_GROUP_SIZE:
301  return handleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc);
302 
303  case PI_ERROR_INVALID_KERNEL_ARGS:
304  throw sycl::nd_range_error(
305  "The kernel argument values have not been specified "
306  " OR "
307  "a kernel argument declared to be a pointer to a type.",
308  PI_ERROR_INVALID_KERNEL_ARGS);
309 
310  case PI_ERROR_INVALID_WORK_ITEM_SIZE:
311  return handleInvalidWorkItemSize(DeviceImpl, NDRDesc);
312 
313  case PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED:
314  throw sycl::nd_range_error(
315  "image object is specified as an argument value"
316  " and the image format is not supported by device associated"
317  " with queue",
318  PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED);
319 
320  case PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET:
321  throw sycl::nd_range_error(
322  "a sub-buffer object is specified as the value for an argument "
323  " that is a buffer object and the offset specified "
324  "when the sub-buffer object is created is not aligned "
325  "to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated"
326  " with queue",
327  PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET);
328 
329  case PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE:
330  throw sycl::nd_range_error(
331  "failure to allocate memory for data store associated with image"
332  " or buffer objects specified as arguments to kernel",
333  PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE);
334 
335  case PI_ERROR_INVALID_IMAGE_SIZE:
336  throw sycl::nd_range_error(
337  "image object is specified as an argument value and the image "
338  "dimensions (image width, height, specified or compute row and/or "
339  "slice pitch) are not supported by device associated with queue",
340  PI_ERROR_INVALID_IMAGE_SIZE);
341 
342  case PI_ERROR_INVALID_VALUE:
343  return handleInvalidValue(DeviceImpl, NDRDesc);
344 
345  case PI_ERROR_PLUGIN_SPECIFIC_ERROR:
346  // checkPiResult does all the necessary handling for
347  // PI_ERROR_PLUGIN_SPECIFIC_ERROR, making sure an error is thrown or not,
348  // depending on whether PI_ERROR_PLUGIN_SPECIFIC_ERROR contains an error or
349  // a warning. It also ensures that the contents of the error message buffer
350  // (used only by PI_ERROR_PLUGIN_SPECIFIC_ERROR) get handled correctly.
351  return DeviceImpl.getPlugin().checkPiResult(Error);
352 
353  // TODO: Handle other error codes
354 
355  default:
356  throw runtime_error(
357  "Native API failed. Native API returns: " + codeToString(Error), Error);
358  }
359 }
360 
361 } // namespace enqueue_kernel_launch
362 
363 } // namespace detail
364 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
365 } // namespace sycl
sycl::range< 3 > GlobalSize
Definition: cg_types.hpp:116
sycl::range< 3 > LocalSize
Definition: cg_types.hpp:117
Param::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
platform get_platform() const
Get associated SYCL platform.
const plugin & getPlugin() const
RT::PiDevice & getHandleRef()
Get reference to PI device.
Definition: device_impl.hpp:64
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
void checkPiResult(RT::PiResult pi_result) const
Checks return value from PI calls.
Definition: plugin.hpp:116
#define __SYCL_INLINE_VER_NAMESPACE(X)
void handleInvalidValue(const device_impl &DeviceImpl, const NDRDescT &NDRDesc)
void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, const NDRDescT &NDRDesc)
void handleInvalidWorkItemSize(const device_impl &DeviceImpl, const NDRDescT &NDRDesc)
void handleErrorOrWarning(pi_result Error, const device_impl &DeviceImpl, pi_kernel Kernel, const NDRDescT &NDRDesc)
Analyzes error code and arguments of piEnqueueKernelLaunch to emit user-friendly exception describing...
::pi_device PiDevice
Definition: pi.hpp:110
static std::string codeToString(pi_int32 code)
Definition: common.hpp:132
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
_pi_result
Definition: pi.h:116
@ PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE
Definition: pi.h:191
@ PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
Definition: pi.h:190
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
Definition: pi.h:301
_pi_program * pi_program
Definition: pi.h:912
@ PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE
Definition: pi.h:348
@ PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE
Definition: pi.h:349
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
@ PI_KERNEL_INFO_PROGRAM
Definition: pi.h:342
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
@ PI_PROGRAM_BUILD_INFO_OPTIONS
Definition: pi.h:141
C++ wrapper of extern "C" PI interfaces.
@ Device
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:791
Implementation of PI Program on CUDA Module object.
Definition: pi_cuda.hpp:736