20 inline namespace _V1 {
21 namespace detail::enqueue_kernel_launch {
29 bool IsOpenCL =
false;
30 bool IsOpenCLV1x =
false;
31 bool IsOpenCLVGE20 =
false;
32 bool IsLevelZero =
false;
35 if (Backend == sycl::backend::opencl) {
37 DeviceImpl.
get_info<info::device::version>().substr(7, 3);
39 IsOpenCLV1x = (VersionString.find(
"1.") == 0);
41 (VersionString.find(
"2.") == 0) || (VersionString.find(
"3.") == 0);
42 }
else if (Backend == sycl::backend::ext_oneapi_level_zero) {
44 }
else if (Backend == sycl::backend::ext_oneapi_cuda) {
51 size_t CompileWGSize[3] = {0};
54 sizeof(size_t) * 3, CompileWGSize,
nullptr);
59 sizeof(size_t), &MaxWGSize,
nullptr);
61 const bool HasLocalSize = (NDRDesc.
LocalSize[0] != 0);
63 if (CompileWGSize[0] != 0) {
64 if (CompileWGSize[0] > MaxWGSize || CompileWGSize[1] > MaxWGSize ||
65 CompileWGSize[2] > MaxWGSize)
68 "Submitting a kernel decorated with reqd_work_group_size attribute "
69 "to a device that does not support this work group size is invalid.");
75 if (!HasLocalSize && (IsOpenCLV1x || IsOpenCLVGE20)) {
76 throw sycl::nd_range_error(
77 "OpenCL 1.x and 2.0 requires to pass local size argument even if "
78 "required work-group size was specified in the program source",
79 PI_ERROR_INVALID_WORK_GROUP_SIZE);
83 if (NDRDesc.
LocalSize[0] != CompileWGSize[0] ||
84 NDRDesc.
LocalSize[1] != CompileWGSize[1] ||
86 throw sycl::nd_range_error(
87 "The specified local size {" + std::to_string(NDRDesc.
LocalSize[2]) +
88 ", " + std::to_string(NDRDesc.
LocalSize[1]) +
", " +
90 "} doesn't match the required work-group size specified "
91 "in the program source {" +
92 std::to_string(CompileWGSize[2]) +
", " +
93 std::to_string(CompileWGSize[1]) +
", " +
94 std::to_string(CompileWGSize[0]) +
"}",
95 PI_ERROR_INVALID_WORK_GROUP_SIZE);
99 size_t MaxThreadsPerBlock[3] = {};
102 MaxThreadsPerBlock,
nullptr);
104 for (
size_t I = 0; I < 3; ++I) {
105 if (MaxThreadsPerBlock[I] < NDRDesc.
LocalSize[I]) {
106 throw sycl::nd_range_error(
107 "The number of work-items in each dimension of a work-group cannot "
109 std::to_string(MaxThreadsPerBlock[0]) +
", " +
110 std::to_string(MaxThreadsPerBlock[1]) +
", " +
111 std::to_string(MaxThreadsPerBlock[2]) +
"} for this device",
112 PI_ERROR_INVALID_WORK_GROUP_SIZE);
124 const size_t TotalNumberOfWIs =
126 if (TotalNumberOfWIs > MaxWGSize)
127 throw sycl::nd_range_error(
128 "Total number of work-items in a work-group cannot exceed " +
129 std::to_string(MaxWGSize),
130 PI_ERROR_INVALID_WORK_GROUP_SIZE);
131 }
else if (IsOpenCLVGE20 || IsLevelZero) {
138 size_t KernelWGSize = 0;
141 &KernelWGSize,
nullptr);
142 const size_t TotalNumberOfWIs =
144 if (TotalNumberOfWIs > KernelWGSize)
145 throw sycl::nd_range_error(
146 "Total number of work-items in a work-group cannot exceed " +
147 std::to_string(KernelWGSize) +
" for this kernel",
148 PI_ERROR_INVALID_WORK_GROUP_SIZE);
155 const bool NonUniformWGs =
165 const bool LocalExceedsGlobal =
176 if (LocalExceedsGlobal)
177 throw sycl::nd_range_error(
"Local workgroup size cannot be greater "
178 "than global range in any dimension",
179 PI_ERROR_INVALID_WORK_GROUP_SIZE);
181 throw sycl::nd_range_error(
182 "Global_work_size must be evenly divisible by local_work_size. "
183 "Non-uniform work-groups are not supported by the target "
185 PI_ERROR_INVALID_WORK_GROUP_SIZE);
204 &Opts.front(),
nullptr);
205 const bool HasStd20 = Opts.find(
"-cl-std=CL2.0") != std::string::npos;
206 const bool RequiresUniformWGSize =
207 Opts.find(
"-cl-uniform-work-group-size") != std::string::npos;
211 ", " + std::to_string(NDRDesc.
LocalSize[2]);
213 std::to_string(NDRDesc.
GlobalSize[0]) +
", " +
214 std::to_string(NDRDesc.
GlobalSize[1]) +
", " +
218 ?
"Local work-group size {" + LocalWGSize +
219 "} is greater than global range size {" + GlobalWGSize +
221 :
"Global work size {" + GlobalWGSize +
222 "} is not evenly divisible by local work-group size {" +
225 throw sycl::nd_range_error(
227 "Non-uniform work-groups are not allowed by "
228 "default. Underlying "
229 "OpenCL 2.x implementation supports this feature "
231 "it, build device program with -cl-std=CL2.0"),
232 PI_ERROR_INVALID_WORK_GROUP_SIZE);
233 else if (RequiresUniformWGSize)
234 throw sycl::nd_range_error(
236 "Non-uniform work-groups are not allowed by when "
237 "-cl-uniform-work-group-size flag is used. Underlying "
238 "OpenCL 2.x implementation supports this feature, but it "
241 "disabled by -cl-uniform-work-group-size build flag"),
242 PI_ERROR_INVALID_WORK_GROUP_SIZE);
255 const size_t TotalNumberOfWIs =
258 uint32_t NumRegisters = 0;
261 &NumRegisters,
nullptr);
263 uint32_t MaxRegistersPerBlock =
264 DeviceImpl.
get_info<ext::codeplay::experimental::info::device::
265 max_registers_per_work_group>();
267 const bool HasExceededAvailableRegisters =
268 TotalNumberOfWIs * NumRegisters > MaxRegistersPerBlock;
270 if (HasExceededAvailableRegisters) {
272 "Exceeded the number of registers available on the hardware.\n");
273 throw sycl::nd_range_error(
276 "\tThe number registers per work-group cannot exceed " +
277 std::to_string(MaxRegistersPerBlock) +
278 " for this kernel on this device.\n"
279 "\tThe kernel uses " +
280 std::to_string(NumRegisters) +
281 " registers per work-item for a total of " +
282 std::to_string(TotalNumberOfWIs) +
283 " work-items per work-group.\n"),
284 PI_ERROR_INVALID_WORK_GROUP_SIZE);
289 throw sycl::nd_range_error(
290 "Non-uniform work-groups are not supported by the target device",
291 PI_ERROR_INVALID_WORK_GROUP_SIZE);
299 constexpr
pi_result Error = PI_ERROR_INVALID_WORK_GROUP_SIZE;
301 "PI backend failed. PI backend returns: " +
codeToString(Error), Error);
310 size_t MaxWISize[] = {0, 0, 0};
315 for (
unsigned I = 0; I < NDRDesc.
Dims; I++) {
317 throw sycl::nd_range_error(
318 "Number of work-items in a work-group exceed limit for dimension " +
319 std::to_string(I) +
" : " + std::to_string(NDRDesc.
LocalSize[I]) +
320 " > " + std::to_string(MaxWISize[I]),
321 PI_ERROR_INVALID_WORK_ITEM_SIZE);
330 size_t MaxNWGs[] = {0, 0, 0};
334 for (
unsigned int I = 0; I < NDRDesc.
Dims; I++) {
336 if (NWgs > MaxNWGs[I])
337 throw sycl::nd_range_error(
338 "Number of work-groups exceed limit for dimension " +
339 std::to_string(I) +
" : " + std::to_string(NWgs) +
" > " +
340 std::to_string(MaxNWGs[I]),
341 PI_ERROR_INVALID_VALUE);
345 constexpr
pi_result Error = PI_ERROR_INVALID_VALUE;
347 "Native API failed. Native API returns: " +
codeToString(Error), Error);
352 assert(Error != PI_SUCCESS &&
353 "Success is expected to be handled on caller side");
355 case PI_ERROR_INVALID_WORK_GROUP_SIZE:
358 case PI_ERROR_INVALID_KERNEL_ARGS:
359 throw sycl::nd_range_error(
360 "The kernel argument values have not been specified "
362 "a kernel argument declared to be a pointer to a type.",
363 PI_ERROR_INVALID_KERNEL_ARGS);
365 case PI_ERROR_INVALID_WORK_ITEM_SIZE:
368 case PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED:
369 throw sycl::nd_range_error(
370 "image object is specified as an argument value"
371 " and the image format is not supported by device associated"
373 PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED);
375 case PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET:
376 throw sycl::nd_range_error(
377 "a sub-buffer object is specified as the value for an argument "
378 " that is a buffer object and the offset specified "
379 "when the sub-buffer object is created is not aligned "
380 "to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated"
382 PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET);
384 case PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE:
385 throw sycl::nd_range_error(
386 "failure to allocate memory for data store associated with image"
387 " or buffer objects specified as arguments to kernel",
388 PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE);
390 case PI_ERROR_INVALID_IMAGE_SIZE:
391 throw sycl::nd_range_error(
392 "image object is specified as an argument value and the image "
393 "dimensions (image width, height, specified or compute row and/or "
394 "slice pitch) are not supported by device associated with queue",
395 PI_ERROR_INVALID_IMAGE_SIZE);
397 case PI_ERROR_INVALID_VALUE:
400 case PI_ERROR_PLUGIN_SPECIFIC_ERROR:
406 return DeviceImpl.
getPlugin()->checkPiResult(Error);
412 "Native API failed. Native API returns: " +
codeToString(Error), Error);
418 namespace detail::kernel_get_group_info {
421 assert(Error != PI_SUCCESS &&
422 "Success is expected to be handled on caller side");
424 case PI_ERROR_INVALID_VALUE:
425 if (Descriptor == CL_KERNEL_GLOBAL_WORK_SIZE)
428 "info::kernel_device_specific::global_work_size descriptor may only "
429 "be used if the device type is device_type::custom or if the kernel "
430 "is a built-in kernel.");
434 Plugin->checkPiResult(Error);
sycl::range< 3 > GlobalSize
sycl::range< 3 > LocalSize
Param::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
platform get_platform() const
Get associated SYCL platform.
const PluginPtr & getPlugin() const
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
void handleInvalidValue(const device_impl &DeviceImpl, const NDRDescT &NDRDesc)
void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, const NDRDescT &NDRDesc)
void handleInvalidWorkItemSize(const device_impl &DeviceImpl, const NDRDescT &NDRDesc)
void handleErrorOrWarning(pi_result Error, const device_impl &DeviceImpl, pi_kernel Kernel, const NDRDescT &NDRDesc)
Analyzes error code and arguments of piEnqueueKernelLaunch to emit user-friendly exception describing...
void handleErrorOrWarning(pi_result Error, pi_kernel_group_info Descriptor, const PluginPtr &Plugin)
Analyzes error code of piKernelGetGroupInfo.
std::string codeToString(pi_int32 code)
std::shared_ptr< plugin > PluginPtr
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
pi_result piProgramGetBuildInfo(pi_program program, pi_device device, _pi_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
@ PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE
@ PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES
@ PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
@ PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE
@ PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE
@ PI_KERNEL_GROUP_INFO_NUM_REGS
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
@ PI_PROGRAM_BUILD_INFO_OPTIONS
C++ wrapper of extern "C" PI interfaces.