DPC++ Runtime
Runtime libraries for oneAPI DPC++
error_handling.cpp
Go to the documentation of this file.
1 //===------------------- enqueue_kernel.cpp ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // SYCL error handling of enqueue kernel operations
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "error_handling.hpp"
14 
15 #include <detail/plugin.hpp>
16 #include <sycl/backend_types.hpp>
17 #include <sycl/detail/ur.hpp>
18 
19 namespace sycl {
20 inline namespace _V1 {
21 namespace detail::enqueue_kernel_launch {
22 
23 void handleOutOfResources(const device_impl &DeviceImpl,
24  ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc) {
25  sycl::platform Platform = DeviceImpl.get_platform();
26  sycl::backend Backend = Platform.get_backend();
27  if (Backend == sycl::backend::ext_oneapi_cuda) {
28  // CUDA:
29  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE is returned when the kernel
30  // registers required for the launch config exceeds the maximum number of
31  // registers per block (UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP).
32  // This is if local_work_size[0] * ... * local_work_size[work_dim - 1]
33  // multiplied by UR_KERNEL_INFO_NUM_REGS is greater than the value of
34  // UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP. See Table 15: Technical
35  // Specifications per Compute Capability, for limitations.
36  const size_t TotalNumberOfWIs =
37  NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
38 
39  const PluginPtr &Plugin = DeviceImpl.getPlugin();
40  uint32_t NumRegisters = 0;
41  Plugin->call<UrApiKind::urKernelGetInfo>(Kernel, UR_KERNEL_INFO_NUM_REGS,
42  sizeof(NumRegisters),
43  &NumRegisters, nullptr);
44 
45  uint32_t MaxRegistersPerBlock =
46  DeviceImpl.get_info<ext::codeplay::experimental::info::device::
47  max_registers_per_work_group>();
48 
49  const bool HasExceededAvailableRegisters =
50  TotalNumberOfWIs * NumRegisters > MaxRegistersPerBlock;
51 
52  if (HasExceededAvailableRegisters) {
53  std::string message(
54  "Exceeded the number of registers available on the hardware.\n");
55  throw sycl::exception(
57  // Additional information which can be helpful to the user.
58  message.append(
59  "\tThe number registers per work-group cannot exceed " +
60  std::to_string(MaxRegistersPerBlock) +
61  " for this kernel on this device.\n"
62  "\tThe kernel uses " +
63  std::to_string(NumRegisters) +
64  " registers per work-item for a total of " +
65  std::to_string(TotalNumberOfWIs) +
66  " work-items per work-group.\n"));
67  }
68  }
69  // Fallback
70  constexpr ur_result_t Error = UR_RESULT_ERROR_OUT_OF_RESOURCES;
71  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
72  "UR backend failed. UR backend returns:" +
73  codeToString(Error));
74 }
75 
76 void handleInvalidWorkGroupSize(const device_impl &DeviceImpl,
77  ur_kernel_handle_t Kernel,
78  const NDRDescT &NDRDesc) {
79  sycl::platform Platform = DeviceImpl.get_platform();
80 
81  // Some of the error handling below is special for particular OpenCL
82  // versions. If this is an OpenCL backend, get the version.
83  bool IsOpenCL = false; // Backend is any OpenCL version
84  bool IsOpenCLV1x = false; // Backend is OpenCL 1.x
85  bool IsOpenCLVGE20 = false; // Backend is Greater or Equal to OpenCL 2.0
86  bool IsLevelZero = false; // Backend is any OneAPI Level 0 version
87  auto Backend = Platform.get_backend();
88  if (Backend == sycl::backend::opencl) {
89  std::string VersionString =
90  DeviceImpl.get_info<info::device::version>().substr(7, 3);
91  IsOpenCL = true;
92  IsOpenCLV1x = (VersionString.find("1.") == 0);
93  IsOpenCLVGE20 =
94  (VersionString.find("2.") == 0) || (VersionString.find("3.") == 0);
95  } else if (Backend == sycl::backend::ext_oneapi_level_zero) {
96  IsLevelZero = true;
97  }
98 
99  const PluginPtr &Plugin = DeviceImpl.getPlugin();
100  ur_device_handle_t Device = DeviceImpl.getHandleRef();
101 
102  size_t CompileWGSize[3] = {0};
103  Plugin->call<UrApiKind::urKernelGetGroupInfo>(
104  Kernel, Device, UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE,
105  sizeof(size_t) * 3, CompileWGSize, nullptr);
106 
107  size_t MaxWGSize = 0;
108  Plugin->call<UrApiKind::urDeviceGetInfo>(Device,
109  UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
110  sizeof(size_t), &MaxWGSize, nullptr);
111 
112  const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0);
113 
114  if (CompileWGSize[0] != 0) {
115  if (CompileWGSize[0] > MaxWGSize || CompileWGSize[1] > MaxWGSize ||
116  CompileWGSize[2] > MaxWGSize)
117  throw sycl::exception(
119  "Submitting a kernel decorated with reqd_work_group_size attribute "
120  "to a device that does not support this work group size is invalid.");
121 
122  // OpenCL 1.x && 2.0:
123  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and
124  // the reqd_work_group_size attribute is used to declare the work-group size
125  // for kernel in the program source.
126  if (!HasLocalSize && (IsOpenCLV1x || IsOpenCLVGE20)) {
127  throw sycl::exception(
129  "OpenCL 1.x and 2.0 requires to pass local size argument even if "
130  "required work-group size was specified in the program source");
131  }
132  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified
133  // and does not match the required work-group size for kernel in the program
134  // source.
135  if (NDRDesc.LocalSize[0] != CompileWGSize[0] ||
136  NDRDesc.LocalSize[1] != CompileWGSize[1] ||
137  NDRDesc.LocalSize[2] != CompileWGSize[2])
138  throw sycl::exception(
140  "The specified local size {" + std::to_string(NDRDesc.LocalSize[2]) +
141  ", " + std::to_string(NDRDesc.LocalSize[1]) + ", " +
142  std::to_string(NDRDesc.LocalSize[0]) +
143  "} doesn't match the required work-group size specified "
144  "in the program source {" +
145  std::to_string(CompileWGSize[2]) + ", " +
146  std::to_string(CompileWGSize[1]) + ", " +
147  std::to_string(CompileWGSize[0]) + "}");
148  }
149 
150  if (HasLocalSize) {
151  size_t MaxThreadsPerBlock[3] = {};
152  Plugin->call<UrApiKind::urDeviceGetInfo>(
153  Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock),
154  MaxThreadsPerBlock, nullptr);
155 
156  for (size_t I = 0; I < 3; ++I) {
157  if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) {
159  "The number of work-items in each dimension of a "
160  "work-group cannot exceed {" +
161  std::to_string(MaxThreadsPerBlock[0]) + ", " +
162  std::to_string(MaxThreadsPerBlock[1]) + ", " +
163  std::to_string(MaxThreadsPerBlock[2]) +
164  "} for this device");
165  }
166  }
167  }
168 
169  if (IsOpenCLV1x) {
170  // OpenCL 1.x:
171  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified
172  // and the total number of work-items in the work-group computed as
173  // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
174  // than the value specified by UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE in
175  // table 4.3
176  const size_t TotalNumberOfWIs =
177  NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
178  if (TotalNumberOfWIs > MaxWGSize)
179  throw sycl::exception(
181  "Total number of work-items in a work-group cannot exceed " +
182  std::to_string(MaxWGSize));
183  } else if (IsOpenCLVGE20 || IsLevelZero) {
184  // OpenCL 2.x or OneAPI Level Zero:
185  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified
186  // and the total number of work-items in the work-group computed as
187  // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater
188  // than the value specified by UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in
189  // table 5.21.
190  size_t KernelWGSize = 0;
191  Plugin->call<UrApiKind::urKernelGetGroupInfo>(
192  Kernel, Device, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t),
193  &KernelWGSize, nullptr);
194  const size_t TotalNumberOfWIs =
195  NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2];
196  if (TotalNumberOfWIs > KernelWGSize)
197  throw sycl::exception(
199  "Total number of work-items in a work-group cannot exceed " +
200  std::to_string(KernelWGSize) + " for this kernel");
201  } else {
202  // TODO: Should probably have something similar for the other backends
203  }
204 
205  if (HasLocalSize) {
206  // Is the global range size evenly divisible by the local workgroup size?
207  const bool NonUniformWGs =
208  (NDRDesc.LocalSize[0] != 0 &&
209  NDRDesc.GlobalSize[0] % NDRDesc.LocalSize[0] != 0) ||
210  (NDRDesc.LocalSize[1] != 0 &&
211  NDRDesc.GlobalSize[1] % NDRDesc.LocalSize[1] != 0) ||
212  (NDRDesc.LocalSize[2] != 0 &&
213  NDRDesc.GlobalSize[2] % NDRDesc.LocalSize[2] != 0);
214  // Is the local size of the workgroup greater than the global range size in
215  // any dimension?
216  if (IsOpenCL) {
217  const bool LocalExceedsGlobal =
218  NonUniformWGs && (NDRDesc.LocalSize[0] > NDRDesc.GlobalSize[0] ||
219  NDRDesc.LocalSize[1] > NDRDesc.GlobalSize[1] ||
220  NDRDesc.LocalSize[2] > NDRDesc.GlobalSize[2]);
221 
222  if (NonUniformWGs) {
223  if (IsOpenCLV1x) {
224  // OpenCL 1.x:
225  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is
226  // specified and number of workitems specified by global_work_size is
227  // not evenly divisible by size of work-group given by local_work_size
228  if (LocalExceedsGlobal)
230  "Local workgroup size cannot be greater than "
231  "global range in any dimension");
232  else
234  "Global_work_size must be evenly divisible "
235  "by local_work_size. Non-uniform work-groups "
236  "are not supported by the target device");
237  } else {
238  // OpenCL 2.x:
239  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if the program was compiled
240  // with –cl-uniform-work-group-size and the number of work-items
241  // specified by global_work_size is not evenly divisible by size of
242  // work-group given by local_work_size
243 
244  ur_program_handle_t Program = nullptr;
245  Plugin->call<UrApiKind::urKernelGetInfo>(
246  Kernel, UR_KERNEL_INFO_PROGRAM, sizeof(ur_program_handle_t),
247  &Program, nullptr);
248  size_t OptsSize = 0;
249  Plugin->call<UrApiKind::urProgramGetBuildInfo>(
250  Program, Device, UR_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr,
251  &OptsSize);
252  std::string Opts(OptsSize, '\0');
253  Plugin->call<UrApiKind::urProgramGetBuildInfo>(
254  Program, Device, UR_PROGRAM_BUILD_INFO_OPTIONS, OptsSize,
255  &Opts.front(), nullptr);
256  const bool HasStd20 = Opts.find("-cl-std=CL2.0") != std::string::npos;
257  const bool RequiresUniformWGSize =
258  Opts.find("-cl-uniform-work-group-size") != std::string::npos;
259  std::string LocalWGSize = std::to_string(NDRDesc.LocalSize[0]) +
260  ", " +
261  std::to_string(NDRDesc.LocalSize[1]) +
262  ", " + std::to_string(NDRDesc.LocalSize[2]);
263  std::string GlobalWGSize =
264  std::to_string(NDRDesc.GlobalSize[0]) + ", " +
265  std::to_string(NDRDesc.GlobalSize[1]) + ", " +
266  std::to_string(NDRDesc.GlobalSize[2]);
267  std::string message =
268  LocalExceedsGlobal
269  ? "Local work-group size {" + LocalWGSize +
270  "} is greater than global range size {" + GlobalWGSize +
271  "}. "
272  : "Global work size {" + GlobalWGSize +
273  "} is not evenly divisible by local work-group size {" +
274  LocalWGSize + "}. ";
275  if (!HasStd20)
276  throw sycl::exception(
278  message.append("Non-uniform work-groups are not allowed by "
279  "default. Underlying OpenCL 2.x implementation "
280  "supports this feature and to enable it, build "
281  "device program with -cl-std=CL2.0"));
282  else if (RequiresUniformWGSize)
283  throw sycl::exception(
285  message.append("Non-uniform work-groups are not allowed when "
286  "-cl-uniform-work-group-size flag is used. "
287  "Underlying OpenCL 2.x implementation supports "
288  "this feature, but it is being disabled by "
289  "-cl-uniform-work-group-size build flag"));
290  // else unknown. fallback (below)
291  }
292  }
293  } else {
294  // TODO: Decide what checks (if any) we need for the other backends
295  }
296  throw sycl::exception(
298  "Non-uniform work-groups are not supported by the target device");
299  }
300  // TODO: required number of sub-groups, OpenCL 2.1:
301  // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and
302  // is not consistent with the required number of sub-groups for kernel in the
303  // program source.
304 
306  "internal error: expected HasLocalSize");
307 }
308 
309 void handleInvalidWorkItemSize(const device_impl &DeviceImpl,
310  const NDRDescT &NDRDesc) {
311 
312  const PluginPtr &Plugin = DeviceImpl.getPlugin();
313  ur_device_handle_t Device = DeviceImpl.getHandleRef();
314 
315  size_t MaxWISize[] = {0, 0, 0};
316 
317  Plugin->call<UrApiKind::urDeviceGetInfo>(
318  Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxWISize), &MaxWISize,
319  nullptr);
320  for (unsigned I = 0; I < NDRDesc.Dims; I++) {
321  if (NDRDesc.LocalSize[I] > MaxWISize[I])
322  throw sycl::exception(
324  "Number of work-items in a work-group exceed limit for dimension " +
325  std::to_string(I) + " : " + std::to_string(NDRDesc.LocalSize[I]) +
326  " > " + std::to_string(MaxWISize[I]));
327  }
328 }
329 
330 void handleInvalidValue(const device_impl &DeviceImpl,
331  const NDRDescT &NDRDesc) {
332  const PluginPtr &Plugin = DeviceImpl.getPlugin();
333  ur_device_handle_t Device = DeviceImpl.getHandleRef();
334 
335  size_t MaxNWGs[] = {0, 0, 0};
336  Plugin->call<UrApiKind::urDeviceGetInfo>(Device,
337  UR_DEVICE_INFO_MAX_WORK_GROUPS_3D,
338  sizeof(MaxNWGs), &MaxNWGs, nullptr);
339  for (unsigned int I = 0; I < NDRDesc.Dims; I++) {
340  size_t NWgs = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
341  if (NWgs > MaxNWGs[I])
342  throw sycl::exception(
344  "Number of work-groups exceed limit for dimension " +
345  std::to_string(I) + " : " + std::to_string(NWgs) + " > " +
346  std::to_string(MaxNWGs[I]));
347  }
348 
349  // fallback
350  throw exception(make_error_code(errc::nd_range), "unknown internal error");
351 }
352 
353 void handleErrorOrWarning(ur_result_t Error, const device_impl &DeviceImpl,
354  ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc) {
355  assert(Error != UR_RESULT_SUCCESS &&
356  "Success is expected to be handled on caller side");
357  switch (Error) {
358  case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY:
359  case UR_RESULT_ERROR_OUT_OF_RESOURCES:
360  return handleOutOfResources(DeviceImpl, Kernel, NDRDesc);
361 
362  case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE:
363  return handleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc);
364 
365  case UR_RESULT_ERROR_INVALID_KERNEL_ARGS:
366  throw detail::set_ur_error(
369  "The kernel argument values have not been specified OR a kernel "
370  "argument declared to be a pointer to a type."),
371  UR_RESULT_ERROR_INVALID_KERNEL_ARGS);
372 
373  case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE:
374  return handleInvalidWorkItemSize(DeviceImpl, NDRDesc);
375 
376  case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT:
377  throw detail::set_ur_error(
380  "image object is specified as an argument value and the image "
381  "format is not supported by device associated with queue"),
382  UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT);
383 
384  case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET:
385  throw detail::set_ur_error(
387  "a sub-buffer object is specified as the value for an "
388  "argument that is a buffer object and the offset "
389  "specified when the sub-buffer object is created is "
390  "not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value "
391  "for device associated with queue"),
392  UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET);
393 
394  case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE:
395  throw detail::set_ur_error(
398  "failure to allocate memory for data store associated with image "
399  "or buffer objects specified as arguments to kernel"),
400  UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE);
401 
402  case UR_RESULT_ERROR_INVALID_IMAGE_SIZE:
403  throw detail::set_ur_error(
406  "image object is specified as an argument value and the image "
407  "dimensions (image width, height, specified or compute row and/or "
408  "slice pitch) are not supported by device associated with queue"),
409  UR_RESULT_ERROR_INVALID_IMAGE_SIZE);
410 
411  case UR_RESULT_ERROR_INVALID_VALUE:
412  return handleInvalidValue(DeviceImpl, NDRDesc);
413 
414  case UR_RESULT_ERROR_ADAPTER_SPECIFIC:
415  // checkUrResult does all the necessary handling for
416  // UR_RESULT_ERROR_ADAPTER_SPECIFIC_ERROR, making sure an error is thrown or
417  // not, depending on whether UR_RESULT_ERROR_ADAPTER_SPECIFIC_ERROR contains
418  // an error or a warning. It also ensures that the contents of the error
419  // message buffer (used only by UR_RESULT_ERROR_ADAPTER_SPECIFIC_ERROR) get
420  // handled correctly.
421  return DeviceImpl.getPlugin()->checkUrResult(Error);
422 
423  // TODO: Handle other error codes
424 
425  default:
426  throw detail::set_ur_error(
427  exception(make_error_code(errc::runtime), "UR error"), Error);
428  }
429 }
430 
431 } // namespace detail::enqueue_kernel_launch
432 
433 namespace detail::kernel_get_group_info {
434 void handleErrorOrWarning(ur_result_t Error, ur_kernel_group_info_t Descriptor,
435  const PluginPtr &Plugin) {
436  assert(Error != UR_RESULT_SUCCESS &&
437  "Success is expected to be handled on caller side");
438  switch (Error) {
439  case UR_RESULT_ERROR_INVALID_VALUE:
440  if (Descriptor == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE)
441  throw sycl::exception(
443  "info::kernel_device_specific::global_work_size descriptor may only "
444  "be used if the device type is device_type::custom or if the kernel "
445  "is a built-in kernel.");
446  break;
447  // TODO: Handle other error codes
448  default:
449  Plugin->checkUrResult(Error);
450  break;
451  }
452 }
453 } // namespace detail::kernel_get_group_info
454 
455 } // namespace _V1
456 } // namespace sycl
sycl::range< 3 > GlobalSize
Definition: cg.hpp:148
sycl::range< 3 > LocalSize
Definition: cg.hpp:149
Param::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
platform get_platform() const
Get associated SYCL platform.
const PluginPtr & getPlugin() const
ur_device_handle_t & getHandleRef()
Get reference to UR device.
Definition: device_impl.hpp:65
Encapsulates a SYCL platform on which kernels may be executed.
Definition: platform.hpp:99
backend get_backend() const noexcept
Returns the backend associated with this platform.
Definition: platform.cpp:59
void handleErrorOrWarning(ur_result_t Error, const device_impl &DeviceImpl, ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc)
Analyzes error code and arguments of urEnqueueKernelLaunch to emit user-friendly exception describing...
void handleInvalidValue(const device_impl &DeviceImpl, const NDRDescT &NDRDesc)
void handleInvalidWorkItemSize(const device_impl &DeviceImpl, const NDRDescT &NDRDesc)
void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc)
void handleOutOfResources(const device_impl &DeviceImpl, ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc)
void handleErrorOrWarning(ur_result_t Error, ur_kernel_group_info_t Descriptor, const PluginPtr &Plugin)
Analyzes error code of urKernelGetGroupInfo.
std::string codeToString(int32_t code)
Definition: exception.hpp:57
std::shared_ptr< plugin > PluginPtr
Definition: ur.hpp:107
exception set_ur_error(exception &&e, int32_t ur_err)
Definition: exception.hpp:157
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:65
Definition: access.hpp:18
C++ utilities for Unified Runtime integration.