DPC++ Runtime
Runtime libraries for oneAPI DPC++
device_info.hpp
Go to the documentation of this file.
1 //==-------- device_info.hpp - SYCL device info methods --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 #include <detail/device_impl.hpp>
11 #include <detail/platform_impl.hpp>
12 #include <detail/platform_util.hpp>
13 #include <detail/plugin.hpp>
16 #include <sycl/detail/defines.hpp>
17 #include <sycl/detail/os_util.hpp>
18 #include <sycl/detail/pi.hpp>
19 #include <sycl/device.hpp>
22 #include <sycl/feature_test.hpp>
23 #include <sycl/info/info_desc.hpp>
24 #include <sycl/memory_enums.hpp>
25 #include <sycl/platform.hpp>
26 
27 #include <chrono>
28 #include <thread>
29 
30 namespace sycl {
31 inline namespace _V1 {
32 namespace detail {
33 
34 inline std::vector<info::fp_config> read_fp_bitfield(pi_device_fp_config bits) {
35  std::vector<info::fp_config> result;
36  if (bits & PI_FP_DENORM)
37  result.push_back(info::fp_config::denorm);
38  if (bits & PI_FP_INF_NAN)
39  result.push_back(info::fp_config::inf_nan);
40  if (bits & PI_FP_ROUND_TO_NEAREST)
41  result.push_back(info::fp_config::round_to_nearest);
42  if (bits & PI_FP_ROUND_TO_ZERO)
43  result.push_back(info::fp_config::round_to_zero);
44  if (bits & PI_FP_ROUND_TO_INF)
45  result.push_back(info::fp_config::round_to_inf);
46  if (bits & PI_FP_FMA)
47  result.push_back(info::fp_config::fma);
48  if (bits & PI_FP_SOFT_FLOAT)
49  result.push_back(info::fp_config::soft_float);
52  return result;
53 }
54 
55 inline std::vector<info::partition_affinity_domain>
57  std::vector<info::partition_affinity_domain> result;
59  result.push_back(info::partition_affinity_domain::numa);
70  return result;
71 }
72 
73 inline std::vector<info::execution_capability>
75  std::vector<info::execution_capability> result;
76  if (bits & PI_EXEC_KERNEL)
78  if (bits & PI_EXEC_NATIVE_KERNEL)
80  return result;
81 }
82 
83 inline std::string
85  switch (AffinityDomain) {
86 #define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN) \
87  case DOMAIN: \
88  return #DOMAIN;
89 
91  sycl::info::partition_affinity_domain::numa)
93  sycl::info::partition_affinity_domain::L4_cache)
95  sycl::info::partition_affinity_domain::L3_cache)
97  sycl::info::partition_affinity_domain::L2_cache)
99  sycl::info::partition_affinity_domain::L1_cache)
101  sycl::info::partition_affinity_domain::next_partitionable)
102 #undef __SYCL_AFFINITY_DOMAIN_STRING_CASE
103  default:
104  assert(false && "Missing case for affinity domain.");
105  return "unknown";
106  }
107 }
108 
109 // Mapping expected SYCL return types to those returned by PI calls
110 template <typename T> struct sycl_to_pi {
111  using type = T;
112 };
113 template <> struct sycl_to_pi<bool> {
114  using type = pi_bool;
115 };
116 template <> struct sycl_to_pi<device> {
118 };
119 template <> struct sycl_to_pi<platform> {
121 };
122 
123 // Mapping fp_config device info types to the values used to check fp support
124 template <typename Param> struct check_fp_support {};
125 
126 template <> struct check_fp_support<info::device::half_fp_config> {
127  using type = info::device::native_vector_width_half;
128 };
129 
130 template <> struct check_fp_support<info::device::double_fp_config> {
131  using type = info::device::native_vector_width_double;
132 };
133 
134 // Structs for emulating function template partial specialization
135 // Default template for the general case
136 // TODO: get rid of remaining uses of OpenCL directly
137 //
138 template <typename ReturnT, typename Param> struct get_device_info_impl {
139  static ReturnT get(const DeviceImplPtr &Dev) {
140  typename sycl_to_pi<ReturnT>::type result;
141  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
142  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
143  nullptr);
144  return ReturnT(result);
145  }
146 };
147 
148 // Specialization for platform
149 template <typename Param> struct get_device_info_impl<platform, Param> {
150  static platform get(const DeviceImplPtr &Dev) {
151  typename sycl_to_pi<platform>::type result;
152  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
153  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
154  nullptr);
155  // TODO: Change PiDevice to device_impl.
156  // Use the Plugin from the device_impl class after plugin details
157  // are added to the class.
158  return createSyclObjFromImpl<platform>(
159  platform_impl::getOrMakePlatformImpl(result, Dev->getPlugin()));
160  }
161 };
162 
163 // Helper function to allow using the specialization of get_device_info_impl
164 // for string return type in other specializations.
166  sycl::detail::pi::PiDeviceInfo InfoCode) const {
167  size_t resultSize = 0;
168  getPlugin()->call<PiApiKind::piDeviceGetInfo>(getHandleRef(), InfoCode, 0,
169  nullptr, &resultSize);
170  if (resultSize == 0) {
171  return std::string();
172  }
173  std::unique_ptr<char[]> result(new char[resultSize]);
175  getHandleRef(), InfoCode, resultSize, result.get(), nullptr);
176 
177  return std::string(result.get());
178 }
179 
180 // Specialization for string return type, variable return size
181 template <typename Param> struct get_device_info_impl<std::string, Param> {
182  static std::string get(const DeviceImplPtr &Dev) {
183  return Dev->get_device_info_string(PiInfoCode<Param>::value);
184  }
185 };
186 
187 // Specialization for parent device
188 template <typename ReturnT>
189 struct get_device_info_impl<ReturnT, info::device::parent_device> {
190  static ReturnT get(const DeviceImplPtr &Dev);
191 };
192 
193 // Specialization for fp_config types, checks the corresponding fp type support
194 template <typename Param>
195 struct get_device_info_impl<std::vector<info::fp_config>, Param> {
196  static std::vector<info::fp_config> get(const DeviceImplPtr &Dev) {
197  // Check if fp type is supported
200  typename check_fp_support<Param>::type>::get(Dev)) {
201  return {};
202  }
203  cl_device_fp_config result;
204  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
205  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
206  nullptr);
207  return read_fp_bitfield(result);
208  }
209 };
210 
211 // Specialization for device version
212 template <> struct get_device_info_impl<std::string, info::device::version> {
213  static std::string get(const DeviceImplPtr &Dev) {
214  return Dev->get_device_info_string(
216  }
217 };
218 
219 // Specialization for single_fp_config, no type support check required
220 template <>
221 struct get_device_info_impl<std::vector<info::fp_config>,
222  info::device::single_fp_config> {
223  static std::vector<info::fp_config> get(const DeviceImplPtr &Dev) {
224  pi_device_fp_config result;
225  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
227  sizeof(result), &result, nullptr);
228  return read_fp_bitfield(result);
229  }
230 };
231 
232 // Specialization for queue_profiling. In addition to pi_queue level profiling,
233 // piGetDeviceAndHostTimer is not supported, command_submit, command_start,
234 // command_end will be calculated. See MFallbackProfiling
235 template <> struct get_device_info_impl<bool, info::device::queue_profiling> {
236  static bool get(const DeviceImplPtr &Dev) {
237  pi_queue_properties Properties;
238  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
240  sizeof(Properties), &Properties, nullptr);
241  return Properties & PI_QUEUE_FLAG_PROFILING_ENABLE;
242  }
243 };
244 
245 // Specialization for atomic_memory_order_capabilities, PI returns a bitfield
246 template <>
247 struct get_device_info_impl<std::vector<memory_order>,
248  info::device::atomic_memory_order_capabilities> {
249  static std::vector<memory_order> get(const DeviceImplPtr &Dev) {
251  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
252  Dev->getHandleRef(),
254  sizeof(pi_memory_order_capabilities), &result, nullptr);
255  return readMemoryOrderBitfield(result);
256  }
257 };
258 
259 // Specialization for atomic_fence_order_capabilities, PI returns a bitfield
260 template <>
261 struct get_device_info_impl<std::vector<memory_order>,
262  info::device::atomic_fence_order_capabilities> {
263  static std::vector<memory_order> get(const DeviceImplPtr &Dev) {
265  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
266  Dev->getHandleRef(),
268  sizeof(pi_memory_order_capabilities), &result, nullptr);
269  return readMemoryOrderBitfield(result);
270  }
271 };
272 
273 // Specialization for atomic_memory_scope_capabilities, PI returns a bitfield
274 template <>
275 struct get_device_info_impl<std::vector<memory_scope>,
276  info::device::atomic_memory_scope_capabilities> {
277  static std::vector<memory_scope> get(const DeviceImplPtr &Dev) {
279  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
280  Dev->getHandleRef(),
282  sizeof(pi_memory_scope_capabilities), &result, nullptr);
283  return readMemoryScopeBitfield(result);
284  }
285 };
286 
287 // Specialization for atomic_fence_scope_capabilities, PI returns a bitfield
288 template <>
289 struct get_device_info_impl<std::vector<memory_scope>,
290  info::device::atomic_fence_scope_capabilities> {
291  static std::vector<memory_scope> get(const DeviceImplPtr &Dev) {
293  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
294  Dev->getHandleRef(),
296  sizeof(pi_memory_scope_capabilities), &result, nullptr);
297  return readMemoryScopeBitfield(result);
298  }
299 };
300 
301 // Specialization for bf16 math functions
302 template <>
304  info::device::ext_oneapi_bfloat16_math_functions> {
305  static bool get(const DeviceImplPtr &Dev) {
306  bool result = false;
307 
309  Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
310  Dev->getHandleRef(),
312  sizeof(result), &result, nullptr);
313  if (Err != PI_SUCCESS) {
314  return false;
315  }
316  return result;
317  }
318 };
319 
320 // Specialization for exec_capabilities, OpenCL returns a bitfield
321 template <>
322 struct get_device_info_impl<std::vector<info::execution_capability>,
323  info::device::execution_capabilities> {
324  static std::vector<info::execution_capability> get(const DeviceImplPtr &Dev) {
326  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
327  Dev->getHandleRef(),
329  &result, nullptr);
330  return read_execution_bitfield(result);
331  }
332 };
333 
334 // Specialization for built in kernel identifiers
335 template <>
336 struct get_device_info_impl<std::vector<kernel_id>,
337  info::device::built_in_kernel_ids> {
338  static std::vector<kernel_id> get(const DeviceImplPtr &Dev) {
339  std::string result = Dev->get_device_info_string(
341  auto names = split_string(result, ';');
342 
343  std::vector<kernel_id> ids;
344  ids.reserve(names.size());
345  for (const auto &name : names) {
346  ids.push_back(ProgramManager::getInstance().getBuiltInKernelID(name));
347  }
348  return ids;
349  }
350 };
351 
352 // Specialization for built in kernels, splits the string returned by OpenCL
353 template <>
354 struct get_device_info_impl<std::vector<std::string>,
355  info::device::built_in_kernels> {
356  static std::vector<std::string> get(const DeviceImplPtr &Dev) {
357  std::string result = Dev->get_device_info_string(
359  return split_string(result, ';');
360  }
361 };
362 
363 // Specialization for extensions, splits the string returned by OpenCL
364 template <>
365 struct get_device_info_impl<std::vector<std::string>,
366  info::device::extensions> {
367  static std::vector<std::string> get(const DeviceImplPtr &Dev) {
368  std::string result =
370  return split_string(result, ' ');
371  }
372 };
373 
375  switch (PP) {
381  return true;
382  }
383  return false;
384 }
385 
386 // Specialization for partition properties, variable OpenCL return size
387 template <>
388 struct get_device_info_impl<std::vector<info::partition_property>,
389  info::device::partition_properties> {
390  static std::vector<info::partition_property> get(const DeviceImplPtr &Dev) {
392  const auto &Plugin = Dev->getPlugin();
393 
394  size_t resultSize;
395  Plugin->call<PiApiKind::piDeviceGetInfo>(
396  Dev->getHandleRef(), info_partition, 0, nullptr, &resultSize);
397 
398  size_t arrayLength = resultSize / sizeof(cl_device_partition_property);
399  if (arrayLength == 0) {
400  return {};
401  }
402  std::unique_ptr<cl_device_partition_property[]> arrayResult(
403  new cl_device_partition_property[arrayLength]);
404  Plugin->call<PiApiKind::piDeviceGetInfo>(Dev->getHandleRef(),
405  info_partition, resultSize,
406  arrayResult.get(), nullptr);
407 
408  std::vector<info::partition_property> result;
409  for (size_t i = 0; i < arrayLength; ++i) {
410  // OpenCL extensions may have partition_properties that
411  // are not yet defined for SYCL (eg. CL_DEVICE_PARTITION_BY_NAMES_INTEL)
413  static_cast<info::partition_property>(arrayResult[i]));
415  result.push_back(pp);
416  }
417  return result;
418  }
419 };
420 
421 // Specialization for partition affinity domains, OpenCL returns a bitfield
422 template <>
423 struct get_device_info_impl<std::vector<info::partition_affinity_domain>,
424  info::device::partition_affinity_domains> {
425  static std::vector<info::partition_affinity_domain>
426  get(const DeviceImplPtr &Dev) {
428  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
429  Dev->getHandleRef(),
431  sizeof(result), &result, nullptr);
432  return read_domain_bitfield(result);
433  }
434 };
435 
436 // Specialization for partition type affinity domain, OpenCL can return other
437 // partition properties instead
438 template <>
440  info::device::partition_type_affinity_domain> {
442  size_t resultSize;
443  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
444  Dev->getHandleRef(),
446  nullptr, &resultSize);
447  if (resultSize != 1) {
449  }
450  cl_device_partition_property result;
451  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
452  Dev->getHandleRef(),
454  sizeof(result), &result, nullptr);
455  if (result == PI_DEVICE_AFFINITY_DOMAIN_NUMA ||
460  return info::partition_affinity_domain(result);
461  }
462 
464  }
465 };
466 
467 // Specialization for partition type
468 template <>
470  info::device::partition_type_property> {
472  size_t resultSize;
473  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
474  Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, 0, nullptr,
475  &resultSize);
476  if (!resultSize)
478 
479  size_t arrayLength = resultSize / sizeof(cl_device_partition_property);
480 
481  std::unique_ptr<cl_device_partition_property[]> arrayResult(
482  new cl_device_partition_property[arrayLength]);
483  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
484  Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, resultSize,
485  arrayResult.get(), nullptr);
486  if (!arrayResult[0])
488  return info::partition_property(arrayResult[0]);
489  }
490 };
491 // Specialization for supported subgroup sizes
492 template <>
493 struct get_device_info_impl<std::vector<size_t>,
494  info::device::sub_group_sizes> {
495  static std::vector<size_t> get(const DeviceImplPtr &Dev) {
496  size_t resultSize = 0;
497  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
499  0, nullptr, &resultSize);
500 
501  std::vector<size_t> result(resultSize / sizeof(size_t));
502  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
504  resultSize, result.data(), nullptr);
505  return result;
506  }
507 };
508 
509 // Specialization for kernel to kernel pipes.
510 // Here we step away from OpenCL, since there is no appropriate cl_device_info
511 // enum for global pipes feature.
512 template <>
513 struct get_device_info_impl<bool, info::device::kernel_kernel_pipe_support> {
514  static bool get(const DeviceImplPtr &Dev) {
515  // We claim, that all Intel FPGA devices support kernel to kernel pipe
516  // feature (at least at the scope of SYCL_INTEL_data_flow_pipes extension).
517  platform plt =
519  std::string platform_name = plt.get_info<info::platform::name>();
520  if (platform_name == "Intel(R) FPGA Emulation Platform for OpenCL(TM)" ||
521  platform_name == "Intel(R) FPGA SDK for OpenCL(TM)")
522  return true;
523 
524  // TODO: a better way is to query for supported SPIR-V capabilities when
525  // it's started to be possible. Also, if a device's backend supports
526  // SPIR-V 1.1 (where Pipe Storage feature was defined), than it supports
527  // the feature as well.
528  return false;
529  }
530 };
531 
532 template <int Dimensions>
533 range<Dimensions> construct_range(size_t *values) = delete;
534 // Due to the flipping of work group dimensions before kernel launch, the values
535 // should also be reversed.
536 template <> inline range<1> construct_range<1>(size_t *values) {
537  return {values[0]};
538 }
539 template <> inline range<2> construct_range<2>(size_t *values) {
540  return {values[1], values[0]};
541 }
542 template <> inline range<3> construct_range<3>(size_t *values) {
543  return {values[2], values[1], values[0]};
544 }
545 
546 // Specialization for max_work_item_sizes.
547 template <int Dimensions>
549  info::device::max_work_item_sizes<Dimensions>> {
550  static range<Dimensions> get(const DeviceImplPtr &Dev) {
551  size_t result[3];
552  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
553  Dev->getHandleRef(),
555  sizeof(result), &result, nullptr);
556  return construct_range<Dimensions>(result);
557  }
558 };
559 
561 
562 // Only for NVIDIA and AMD GPU architectures
563 constexpr std::pair<const char *, oneapi_exp_arch> NvidiaAmdGPUArchitectures[] =
564  {
565  {"5.0", oneapi_exp_arch::nvidia_gpu_sm_50},
566  {"5.2", oneapi_exp_arch::nvidia_gpu_sm_52},
567  {"5.3", oneapi_exp_arch::nvidia_gpu_sm_53},
568  {"6.0", oneapi_exp_arch::nvidia_gpu_sm_60},
569  {"6.1", oneapi_exp_arch::nvidia_gpu_sm_61},
570  {"6.2", oneapi_exp_arch::nvidia_gpu_sm_62},
571  {"7.0", oneapi_exp_arch::nvidia_gpu_sm_70},
572  {"7.2", oneapi_exp_arch::nvidia_gpu_sm_72},
573  {"7.5", oneapi_exp_arch::nvidia_gpu_sm_75},
574  {"8.0", oneapi_exp_arch::nvidia_gpu_sm_80},
575  {"8.6", oneapi_exp_arch::nvidia_gpu_sm_86},
576  {"8.7", oneapi_exp_arch::nvidia_gpu_sm_87},
577  {"8.9", oneapi_exp_arch::nvidia_gpu_sm_89},
578  {"9.0", oneapi_exp_arch::nvidia_gpu_sm_90},
579  {"gfx701", oneapi_exp_arch::amd_gpu_gfx701},
580  {"gfx702", oneapi_exp_arch::amd_gpu_gfx702},
581  {"gfx801", oneapi_exp_arch::amd_gpu_gfx801},
582  {"gfx802", oneapi_exp_arch::amd_gpu_gfx802},
583  {"gfx803", oneapi_exp_arch::amd_gpu_gfx803},
584  {"gfx805", oneapi_exp_arch::amd_gpu_gfx805},
585  {"gfx810", oneapi_exp_arch::amd_gpu_gfx810},
586  {"gfx900", oneapi_exp_arch::amd_gpu_gfx900},
587  {"gfx902", oneapi_exp_arch::amd_gpu_gfx902},
588  {"gfx904", oneapi_exp_arch::amd_gpu_gfx904},
589  {"gfx906", oneapi_exp_arch::amd_gpu_gfx906},
590  {"gfx908", oneapi_exp_arch::amd_gpu_gfx908},
591  {"gfx909", oneapi_exp_arch::amd_gpu_gfx909},
592  {"gfx90a", oneapi_exp_arch::amd_gpu_gfx90a},
593  {"gfx90c", oneapi_exp_arch::amd_gpu_gfx90c},
594  {"gfx940", oneapi_exp_arch::amd_gpu_gfx940},
595  {"gfx941", oneapi_exp_arch::amd_gpu_gfx941},
596  {"gfx942", oneapi_exp_arch::amd_gpu_gfx942},
597  {"gfx1010", oneapi_exp_arch::amd_gpu_gfx1010},
598  {"gfx1011", oneapi_exp_arch::amd_gpu_gfx1011},
599  {"gfx1012", oneapi_exp_arch::amd_gpu_gfx1012},
600  {"gfx1013", oneapi_exp_arch::amd_gpu_gfx1013},
601  {"gfx1030", oneapi_exp_arch::amd_gpu_gfx1030},
602  {"gfx1031", oneapi_exp_arch::amd_gpu_gfx1031},
603  {"gfx1032", oneapi_exp_arch::amd_gpu_gfx1032},
604  {"gfx1033", oneapi_exp_arch::amd_gpu_gfx1033},
605  {"gfx1034", oneapi_exp_arch::amd_gpu_gfx1034},
606  {"gfx1035", oneapi_exp_arch::amd_gpu_gfx1035},
607  {"gfx1036", oneapi_exp_arch::amd_gpu_gfx1036},
608  {"gfx1100", oneapi_exp_arch::amd_gpu_gfx1100},
609  {"gfx1101", oneapi_exp_arch::amd_gpu_gfx1101},
610  {"gfx1102", oneapi_exp_arch::amd_gpu_gfx1102},
611  {"gfx1103", oneapi_exp_arch::amd_gpu_gfx1103},
612  {"gfx1150", oneapi_exp_arch::amd_gpu_gfx1150},
613  {"gfx1151", oneapi_exp_arch::amd_gpu_gfx1151},
614  {"gfx1200", oneapi_exp_arch::amd_gpu_gfx1200},
615  {"gfx1201", oneapi_exp_arch::amd_gpu_gfx1201},
616 };
617 
618 // Only for Intel GPU architectures
619 constexpr std::pair<const int, oneapi_exp_arch> IntelGPUArchitectures[] = {
620  {0x02000000, oneapi_exp_arch::intel_gpu_bdw},
621  {0x02400009, oneapi_exp_arch::intel_gpu_skl},
622  {0x02404009, oneapi_exp_arch::intel_gpu_kbl},
623  {0x02408009, oneapi_exp_arch::intel_gpu_cfl},
624  {0x0240c000, oneapi_exp_arch::intel_gpu_apl},
625  {0x02410000, oneapi_exp_arch::intel_gpu_glk},
626  {0x02414000, oneapi_exp_arch::intel_gpu_whl},
627  {0x02418000, oneapi_exp_arch::intel_gpu_aml},
628  {0x0241c000, oneapi_exp_arch::intel_gpu_cml},
629  {0x02c00000, oneapi_exp_arch::intel_gpu_icllp},
630  {0x02c08000, oneapi_exp_arch::intel_gpu_ehl},
631  {0x03000000, oneapi_exp_arch::intel_gpu_tgllp},
632  {0x03004000, oneapi_exp_arch::intel_gpu_rkl},
633  {0x03008000, oneapi_exp_arch::intel_gpu_adl_s},
634  {0x0300c000, oneapi_exp_arch::intel_gpu_adl_p},
635  {0x03010000, oneapi_exp_arch::intel_gpu_adl_n},
636  {0x03028000, oneapi_exp_arch::intel_gpu_dg1},
637  {0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10},
638  {0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11},
639  {0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12},
640  {0x030f0007, oneapi_exp_arch::intel_gpu_pvc},
641  {0x030f4007, oneapi_exp_arch::intel_gpu_pvc_vg},
642 };
643 
644 // Only for Intel CPU architectures
645 constexpr std::pair<const int, oneapi_exp_arch> IntelCPUArchitectures[] = {
646  {8, oneapi_exp_arch::intel_cpu_spr},
647  {9, oneapi_exp_arch::intel_cpu_gnr},
648 };
649 
650 template <>
655  backend CurrentBackend = Dev->getBackend();
656  if (Dev->is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend ||
657  backend::opencl == CurrentBackend)) {
658  auto MapArchIDToArchName = [](const int arch) {
659  for (const auto &Item : IntelGPUArchitectures) {
660  if (Item.first == arch)
661  return Item.second;
662  }
663  throw sycl::exception(
665  "The current device architecture is not supported by "
666  "sycl_ext_oneapi_device_architecture.");
667  };
668  uint32_t DeviceIp;
669  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
670  Dev->getHandleRef(),
671  PiInfoCode<
673  sizeof(DeviceIp), &DeviceIp, nullptr);
674  return MapArchIDToArchName(DeviceIp);
675  } else if (Dev->is_gpu() && (backend::ext_oneapi_cuda == CurrentBackend ||
676  backend::ext_oneapi_hip == CurrentBackend)) {
677  auto MapArchIDToArchName = [](const char *arch) {
678  for (const auto &Item : NvidiaAmdGPUArchitectures) {
679  if (std::string_view(Item.first) == arch)
680  return Item.second;
681  }
682  throw sycl::exception(
684  "The current device architecture is not supported by "
685  "sycl_ext_oneapi_device_architecture.");
686  };
687  size_t ResultSize = 0;
688  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
689  Dev->getHandleRef(), PiInfoCode<info::device::version>::value, 0,
690  nullptr, &ResultSize);
691  std::unique_ptr<char[]> DeviceArch(new char[ResultSize]);
692  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
693  Dev->getHandleRef(), PiInfoCode<info::device::version>::value,
694  ResultSize, DeviceArch.get(), nullptr);
695  std::string DeviceArchCopy(DeviceArch.get());
696  std::string DeviceArchSubstr =
697  DeviceArchCopy.substr(0, DeviceArchCopy.find(":"));
698  return MapArchIDToArchName(DeviceArchSubstr.data());
699  } else if (Dev->is_cpu() && backend::opencl == CurrentBackend) {
700  auto MapArchIDToArchName = [](const int arch) {
701  for (const auto &Item : IntelCPUArchitectures) {
702  if (Item.first == arch)
703  return Item.second;
704  }
705  return sycl::ext::oneapi::experimental::architecture::x86_64;
706  };
707  uint32_t DeviceIp;
708  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
709  Dev->getHandleRef(),
710  PiInfoCode<
712  sizeof(DeviceIp), &DeviceIp, nullptr);
713  return MapArchIDToArchName(DeviceIp);
714  } // else is not needed
715  // TODO: add support of other architectures by extending with else if
716  // Generating a user-friendly error message
717  std::string DeviceStr;
718  if (Dev->is_gpu())
719  DeviceStr = "GPU";
720  else if (Dev->is_cpu())
721  DeviceStr = "CPU";
722  else if (Dev->is_accelerator())
723  DeviceStr = "accelerator";
724  // else if not needed
725  std::stringstream ErrorMessage;
726  ErrorMessage
727  << "sycl_ext_oneapi_device_architecture feature is not supported on "
728  << DeviceStr << " device with sycl::backend::" << CurrentBackend
729  << " backend.";
730  throw sycl::exception(make_error_code(errc::runtime), ErrorMessage.str());
731  }
732 };
733 
734 template <>
736  std::vector<ext::oneapi::experimental::matrix::combination>,
737  ext::oneapi::experimental::info::device::matrix_combinations> {
738  static std::vector<ext::oneapi::experimental::matrix::combination>
739  get(const DeviceImplPtr &Dev) {
740  using namespace ext::oneapi::experimental::matrix;
741  using namespace ext::oneapi::experimental;
742  backend CurrentBackend = Dev->getBackend();
743  auto get_current_architecture = [&Dev]() -> std::optional<architecture> {
744  // this helper lambda ignores all runtime-related exceptions from
745  // quering the device architecture. For instance, if device architecture
746  // on user's machine is not supported by
747  // sycl_ext_oneapi_device_architecture, the runtime exception is omitted,
748  // and std::nullopt is returned.
749  try {
750  return get_device_info_impl<
751  architecture,
753  } catch (sycl::exception &e) {
754  if (e.code() != errc::runtime)
755  std::rethrow_exception(std::make_exception_ptr(e));
756  }
757  return std::nullopt;
758  };
759  std::optional<architecture> DeviceArchOpt = get_current_architecture();
760  if (!DeviceArchOpt.has_value())
761  return {};
762  architecture DeviceArch = DeviceArchOpt.value();
763  if (architecture::intel_cpu_spr == DeviceArch)
764  return {
765  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
766  matrix_type::sint32, matrix_type::sint32},
767  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
768  matrix_type::sint32, matrix_type::sint32},
769  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
770  matrix_type::sint32, matrix_type::sint32},
771  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
772  matrix_type::sint32, matrix_type::sint32},
773  {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
774  matrix_type::fp32, matrix_type::fp32},
775  };
776  else if (architecture::intel_cpu_gnr == DeviceArch)
777  return {
778  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
779  matrix_type::sint32, matrix_type::sint32},
780  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
781  matrix_type::sint32, matrix_type::sint32},
782  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
783  matrix_type::sint32, matrix_type::sint32},
784  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
785  matrix_type::sint32, matrix_type::sint32},
786  {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
787  matrix_type::fp32, matrix_type::fp32},
788  {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
789  matrix_type::fp32, matrix_type::fp32},
790  };
791  else if (architecture::intel_gpu_pvc == DeviceArch)
792  return {
793  {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::uint8,
794  matrix_type::sint32, matrix_type::sint32},
795  {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::sint8,
796  matrix_type::sint32, matrix_type::sint32},
797  {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::uint8,
798  matrix_type::sint32, matrix_type::sint32},
799  {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::sint8,
800  matrix_type::sint32, matrix_type::sint32},
801  {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
802  matrix_type::fp32, matrix_type::fp32},
803  {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
804  matrix_type::fp32, matrix_type::fp32},
805  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
806  matrix_type::fp32, matrix_type::fp32},
807  {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
808  matrix_type::fp32, matrix_type::fp32},
809  {8, 0, 0, 0, 16, 8, matrix_type::tf32, matrix_type::tf32,
810  matrix_type::fp32, matrix_type::fp32},
811  };
812  else if ((architecture::intel_gpu_dg2_g10 == DeviceArch) ||
813  (architecture::intel_gpu_dg2_g11 == DeviceArch) ||
814  (architecture::intel_gpu_dg2_g12 == DeviceArch))
815  return {
816  {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::uint8,
817  matrix_type::sint32, matrix_type::sint32},
818  {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::sint8,
819  matrix_type::sint32, matrix_type::sint32},
820  {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::uint8,
821  matrix_type::sint32, matrix_type::sint32},
822  {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::sint8,
823  matrix_type::sint32, matrix_type::sint32},
824  {8, 0, 0, 0, 8, 16, matrix_type::fp16, matrix_type::fp16,
825  matrix_type::fp32, matrix_type::fp32},
826  {8, 0, 0, 0, 8, 16, matrix_type::bf16, matrix_type::bf16,
827  matrix_type::fp32, matrix_type::fp32},
828  };
829  else if (architecture::amd_gpu_gfx90a == DeviceArch)
830  return {
831  {0, 0, 0, 32, 32, 8, matrix_type::fp16, matrix_type::fp16,
832  matrix_type::fp32, matrix_type::fp32},
833  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
834  matrix_type::fp32, matrix_type::fp32},
835  {0, 0, 0, 32, 32, 8, matrix_type::sint8, matrix_type::sint8,
836  matrix_type::sint32, matrix_type::sint32},
837  {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
838  matrix_type::sint32, matrix_type::sint32},
839  {0, 0, 0, 32, 32, 8, matrix_type::bf16, matrix_type::bf16,
840  matrix_type::fp32, matrix_type::fp32},
841  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
842  matrix_type::fp32, matrix_type::fp32},
843  {0, 0, 0, 16, 16, 4, matrix_type::fp64, matrix_type::fp64,
844  matrix_type::fp64, matrix_type::fp64},
845  };
846  else if (backend::ext_oneapi_cuda == CurrentBackend) {
847  // TODO: Tho following can be simplified when comparison of architectures
848  // using < and > will be implemented
850  constexpr std::pair<float, oneapi_exp_arch> NvidiaArchNumbs[] = {
851  {5.0, oneapi_exp_arch::nvidia_gpu_sm_50},
852  {5.2, oneapi_exp_arch::nvidia_gpu_sm_52},
853  {5.3, oneapi_exp_arch::nvidia_gpu_sm_53},
854  {6.0, oneapi_exp_arch::nvidia_gpu_sm_60},
855  {6.1, oneapi_exp_arch::nvidia_gpu_sm_61},
856  {6.2, oneapi_exp_arch::nvidia_gpu_sm_62},
857  {7.0, oneapi_exp_arch::nvidia_gpu_sm_70},
858  {7.2, oneapi_exp_arch::nvidia_gpu_sm_72},
859  {7.5, oneapi_exp_arch::nvidia_gpu_sm_75},
860  {8.0, oneapi_exp_arch::nvidia_gpu_sm_80},
861  {8.6, oneapi_exp_arch::nvidia_gpu_sm_86},
862  {8.7, oneapi_exp_arch::nvidia_gpu_sm_87},
863  {8.9, oneapi_exp_arch::nvidia_gpu_sm_89},
864  {9.0, oneapi_exp_arch::nvidia_gpu_sm_90},
865  };
866  auto GetArchNum = [&](const architecture &arch) {
867  for (const auto &Item : NvidiaArchNumbs)
868  if (Item.second == arch)
869  return Item.first;
870  return 0.f;
871  };
872  float ComputeCapability = GetArchNum(DeviceArch);
873  std::vector<combination> sm_70_combinations = {
874  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
875  matrix_type::fp32, matrix_type::fp32},
876  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
877  matrix_type::fp32, matrix_type::fp32},
878  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
879  matrix_type::fp32, matrix_type::fp32},
880  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
881  matrix_type::fp16, matrix_type::fp16},
882  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
883  matrix_type::fp16, matrix_type::fp16},
884  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
885  matrix_type::fp16, matrix_type::fp16},
886  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
887  matrix_type::fp32, matrix_type::fp16},
888  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
889  matrix_type::fp32, matrix_type::fp16},
890  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
891  matrix_type::fp32, matrix_type::fp16},
892  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
893  matrix_type::fp16, matrix_type::fp32},
894  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
895  matrix_type::fp16, matrix_type::fp32},
896  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
897  matrix_type::fp16, matrix_type::fp32}};
898  std::vector<combination> sm_72_combinations = {
899  {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
900  matrix_type::sint32, matrix_type::sint32},
901  {0, 0, 0, 8, 32, 16, matrix_type::sint8, matrix_type::sint8,
902  matrix_type::sint32, matrix_type::sint32},
903  {0, 0, 0, 32, 8, 16, matrix_type::sint8, matrix_type::sint8,
904  matrix_type::sint32, matrix_type::sint32},
905  {0, 0, 0, 16, 16, 16, matrix_type::uint8, matrix_type::uint8,
906  matrix_type::sint32, matrix_type::sint32},
907  {0, 0, 0, 8, 32, 16, matrix_type::uint8, matrix_type::uint8,
908  matrix_type::sint32, matrix_type::sint32},
909  {0, 0, 0, 32, 8, 16, matrix_type::uint8, matrix_type::uint8,
910  matrix_type::sint32, matrix_type::sint32}};
911  std::vector<combination> sm_80_combinations = {
912  {0, 0, 0, 16, 16, 8, matrix_type::tf32, matrix_type::tf32,
913  matrix_type::fp32, matrix_type::fp32},
914  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
915  matrix_type::fp32, matrix_type::fp32},
916  {0, 0, 0, 8, 32, 16, matrix_type::bf16, matrix_type::bf16,
917  matrix_type::fp32, matrix_type::fp32},
918  {0, 0, 0, 32, 8, 16, matrix_type::bf16, matrix_type::bf16,
919  matrix_type::fp32, matrix_type::fp32},
920  {0, 0, 0, 8, 8, 4, matrix_type::fp64, matrix_type::fp64,
921  matrix_type::fp64, matrix_type::fp64}};
922  if (ComputeCapability >= 8.0) {
923  sm_80_combinations.insert(sm_80_combinations.end(),
924  sm_72_combinations.begin(),
925  sm_72_combinations.end());
926  sm_80_combinations.insert(sm_80_combinations.end(),
927  sm_70_combinations.begin(),
928  sm_70_combinations.end());
929  return sm_80_combinations;
930  } else if (ComputeCapability >= 7.2) {
931  sm_72_combinations.insert(sm_72_combinations.end(),
932  sm_70_combinations.begin(),
933  sm_70_combinations.end());
934  return sm_72_combinations;
935  } else if (ComputeCapability >= 7.0)
936  return sm_70_combinations;
937  }
938  return {};
939  }
940 };
941 
942 template <>
944  size_t, ext::oneapi::experimental::info::device::max_global_work_groups> {
945  static size_t get(const DeviceImplPtr) {
946  return static_cast<size_t>((std::numeric_limits<int>::max)());
947  }
948 };
949 template <>
951  id<1>, ext::oneapi::experimental::info::device::max_work_groups<1>> {
952  static id<1> get(const DeviceImplPtr &Dev) {
953  size_t result[3];
954  size_t Limit =
955  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
956  max_global_work_groups>::get(Dev);
957  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
958  Dev->getHandleRef(),
959  PiInfoCode<
961  sizeof(result), &result, nullptr);
962  return id<1>(std::min(Limit, result[0]));
963  }
964 };
965 
966 template <>
968  id<2>, ext::oneapi::experimental::info::device::max_work_groups<2>> {
969  static id<2> get(const DeviceImplPtr &Dev) {
970  size_t result[3];
971  size_t Limit =
972  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
973  max_global_work_groups>::get(Dev);
974  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
975  Dev->getHandleRef(),
976  PiInfoCode<
978  sizeof(result), &result, nullptr);
979  return id<2>(std::min(Limit, result[1]), std::min(Limit, result[0]));
980  }
981 };
982 
983 template <>
985  id<3>, ext::oneapi::experimental::info::device::max_work_groups<3>> {
986  static id<3> get(const DeviceImplPtr &Dev) {
987  size_t result[3];
988  size_t Limit =
989  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
990  max_global_work_groups>::get(Dev);
991  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
992  Dev->getHandleRef(),
993  PiInfoCode<
995  sizeof(result), &result, nullptr);
996  return id<3>(std::min(Limit, result[2]), std::min(Limit, result[1]),
997  std::min(Limit, result[0]));
998  }
999 };
1000 
1001 // TODO:Remove with deprecated feature
1002 // device::get_info<info::device::ext_oneapi_max_global_work_groups>
1003 template <>
1004 struct get_device_info_impl<size_t,
1005  info::device::ext_oneapi_max_global_work_groups> {
1006  static size_t get(const DeviceImplPtr &Dev) {
1007  return get_device_info_impl<size_t,
1008  ext::oneapi::experimental::info::device::
1009  max_global_work_groups>::get(Dev);
1010  }
1011 };
1012 
1013 // TODO:Remove with deprecated feature
1014 // device::get_info<info::device::ext_oneapi_max_work_groups_1d>
1015 template <>
1017  info::device::ext_oneapi_max_work_groups_1d> {
1018  static id<1> get(const DeviceImplPtr &Dev) {
1019  return get_device_info_impl<
1020  id<1>,
1022  }
1023 };
1024 
1025 // TODO:Remove with deprecated feature
1026 // device::get_info<info::device::ext_oneapi_max_work_groups_2d>
1027 template <>
1029  info::device::ext_oneapi_max_work_groups_2d> {
1030  static id<2> get(const DeviceImplPtr &Dev) {
1031  return get_device_info_impl<
1032  id<2>,
1034  }
1035 };
1036 
1037 // TODO:Remove with deprecated feature
1038 // device::get_info<info::device::ext_oneapi_max_work_groups_3d>
1039 template <>
1041  info::device::ext_oneapi_max_work_groups_3d> {
1042  static id<3> get(const DeviceImplPtr &Dev) {
1043  return get_device_info_impl<
1044  id<3>,
1046  }
1047 };
1048 
1049 // Specialization for parent device
1050 template <> struct get_device_info_impl<device, info::device::parent_device> {
1051  static device get(const DeviceImplPtr &Dev) {
1052  typename sycl_to_pi<device>::type result;
1053  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1055  sizeof(result), &result, nullptr);
1056  if (result == nullptr)
1057  throw invalid_object_error(
1058  "No parent for device because it is not a subdevice",
1059  PI_ERROR_INVALID_DEVICE);
1060 
1061  const auto &Platform = Dev->getPlatformImpl();
1062  return createSyclObjFromImpl<device>(
1063  Platform->getOrMakeDeviceImpl(result, Platform));
1064  }
1065 };
1066 
1067 // Specialization for image_support
1068 template <> struct get_device_info_impl<bool, info::device::image_support> {
1069  static bool get(const DeviceImplPtr &) {
1070  // No devices currently support SYCL 2020 images.
1071  return false;
1072  }
1073 };
1074 
1075 // USM
1076 
1077 // Specialization for device usm query.
1078 template <>
1079 struct get_device_info_impl<bool, info::device::usm_device_allocations> {
1080  static bool get(const DeviceImplPtr &Dev) {
1081  pi_usm_capabilities caps;
1082  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1083  Dev->getHandleRef(),
1085  sizeof(pi_usm_capabilities), &caps, nullptr);
1086 
1087  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1088  }
1089 };
1090 
1091 // Specialization for host usm query.
1092 template <>
1093 struct get_device_info_impl<bool, info::device::usm_host_allocations> {
1094  static bool get(const DeviceImplPtr &Dev) {
1095  pi_usm_capabilities caps;
1096  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1097  Dev->getHandleRef(),
1099  sizeof(pi_usm_capabilities), &caps, nullptr);
1100 
1101  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1102  }
1103 };
1104 
1105 // Specialization for shared usm query.
1106 template <>
1107 struct get_device_info_impl<bool, info::device::usm_shared_allocations> {
1108  static bool get(const DeviceImplPtr &Dev) {
1109  pi_usm_capabilities caps;
1110  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1111  Dev->getHandleRef(),
1113  sizeof(pi_usm_capabilities), &caps, nullptr);
1114  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1115  }
1116 };
1117 
1118 // Specialization for restricted usm query
1119 template <>
1121  info::device::usm_restricted_shared_allocations> {
1122  static bool get(const DeviceImplPtr &Dev) {
1123  pi_usm_capabilities caps;
1124  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1125  Dev->getHandleRef(),
1127  sizeof(pi_usm_capabilities), &caps, nullptr);
1128  // Check that we don't support any cross device sharing
1129  return (Err != PI_SUCCESS)
1130  ? false
1131  : !(caps & (PI_USM_ACCESS | PI_USM_CONCURRENT_ACCESS));
1132  }
1133 };
1134 
1135 // Specialization for system usm query
1136 template <>
1137 struct get_device_info_impl<bool, info::device::usm_system_allocations> {
1138  static bool get(const DeviceImplPtr &Dev) {
1139  pi_usm_capabilities caps;
1140  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1141  Dev->getHandleRef(),
1143  sizeof(pi_usm_capabilities), &caps, nullptr);
1144  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1145  }
1146 };
1147 
1148 // Specialization for kernel fusion support
1149 template <>
1151  bool, ext::codeplay::experimental::info::device::supports_fusion> {
1152  static bool get(const DeviceImplPtr &Dev) {
1153 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
1154  // Currently fusion is only supported for SPIR-V based backends,
1155  // CUDA and HIP.
1156  if (Dev->getBackend() == backend::opencl) {
1157  // Exclude all non-CPU or non-GPU devices on OpenCL, in particular
1158  // accelerators.
1159  return Dev->is_cpu() || Dev->is_gpu();
1160  }
1161 
1162  return (Dev->getBackend() == backend::ext_oneapi_level_zero) ||
1163  (Dev->getBackend() == backend::ext_oneapi_cuda) ||
1164  (Dev->getBackend() == backend::ext_oneapi_hip);
1165 #else // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1166  (void)Dev;
1167  return false;
1168 #endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1169  }
1170 };
1171 
1172 // Specialization for max registers per work-group
1173 template <>
1175  uint32_t,
1176  ext::codeplay::experimental::info::device::max_registers_per_work_group> {
1177  static uint32_t get(const DeviceImplPtr &Dev) {
1178  uint32_t maxRegsPerWG;
1179  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1180  Dev->getHandleRef(),
1181  PiInfoCode<ext::codeplay::experimental::info::device::
1182  max_registers_per_work_group>::value,
1183  sizeof(maxRegsPerWG), &maxRegsPerWG, nullptr);
1184  return maxRegsPerWG;
1185  }
1186 };
1187 
1188 // Specialization for composite devices extension.
1189 template <>
1191  std::vector<sycl::device>,
1192  ext::oneapi::experimental::info::device::component_devices> {
1193  static std::vector<sycl::device> get(const DeviceImplPtr &Dev) {
1194  if (Dev->getBackend() != backend::ext_oneapi_level_zero)
1195  return {};
1196  size_t ResultSize = 0;
1197  // First call to get DevCount.
1198  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1199  Dev->getHandleRef(),
1200  PiInfoCode<
1201  ext::oneapi::experimental::info::device::component_devices>::value,
1202  0, nullptr, &ResultSize);
1203  size_t DevCount = ResultSize / sizeof(pi_device);
1204  // Second call to get the list.
1205  std::vector<pi_device> Devs(DevCount);
1206  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1207  Dev->getHandleRef(),
1208  PiInfoCode<
1209  ext::oneapi::experimental::info::device::component_devices>::value,
1210  ResultSize, Devs.data(), nullptr);
1211  std::vector<sycl::device> Result;
1212  const auto &Platform = Dev->getPlatformImpl();
1213  for (const auto &d : Devs)
1214  Result.push_back(createSyclObjFromImpl<device>(
1215  Platform->getOrMakeDeviceImpl(d, Platform)));
1216 
1217  return Result;
1218  }
1219 };
1220 template <>
1222  sycl::device, ext::oneapi::experimental::info::device::composite_device> {
1223  static sycl::device get(const DeviceImplPtr &Dev) {
1224  if (Dev->getBackend() != backend::ext_oneapi_level_zero)
1225  return {};
1226  if (!Dev->has(sycl::aspect::ext_oneapi_is_component))
1228  "Only devices with aspect::ext_oneapi_is_component "
1229  "can call this function.");
1230 
1231  typename sycl_to_pi<device>::type Result;
1232  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1233  Dev->getHandleRef(),
1234  PiInfoCode<
1235  ext::oneapi::experimental::info::device::composite_device>::value,
1236  sizeof(Result), &Result, nullptr);
1237 
1238  if (Result) {
1239  const auto &Platform = Dev->getPlatformImpl();
1240  return createSyclObjFromImpl<device>(
1241  Platform->getOrMakeDeviceImpl(Result, Platform));
1242  }
1244  "A component with aspect::ext_oneapi_is_component "
1245  "must have a composite device.");
1246  }
1247 };
1248 
1249 template <typename Param>
1250 typename Param::return_type get_device_info(const DeviceImplPtr &Dev) {
1251  static_assert(is_device_info_desc<Param>::value,
1252  "Invalid device information descriptor");
1253  if (std::is_same<Param,
1254  sycl::_V1::ext::intel::info::device::free_memory>::value) {
1255  if (!Dev->has(aspect::ext_intel_free_memory))
1256  throw invalid_object_error(
1257  "The device does not have the ext_intel_free_memory aspect",
1258  PI_ERROR_INVALID_DEVICE);
1259  }
1261 }
1262 
1263 // SYCL host device information
1264 
1265 // Default template is disabled, all possible instantiations are
1266 // specified explicitly.
1267 template <typename Param>
1268 inline typename Param::return_type get_device_info_host() = delete;
1269 
1270 template <>
1271 inline std::vector<sycl::aspect> get_device_info_host<info::device::aspects>() {
1272  return std::vector<sycl::aspect>();
1273 }
1274 
1275 template <>
1277 get_device_info_host<ext::oneapi::experimental::info::device::architecture>() {
1279 }
1280 
1281 template <>
1282 inline info::device_type get_device_info_host<info::device::device_type>() {
1283  return info::device_type::host;
1284 }
1285 
1286 template <> inline uint32_t get_device_info_host<info::device::vendor_id>() {
1287  return 0x8086;
1288 }
1289 
1290 template <>
1291 inline uint32_t get_device_info_host<info::device::max_compute_units>() {
1292  return std::thread::hardware_concurrency();
1293 }
1294 
1295 template <>
1296 inline uint32_t get_device_info_host<info::device::max_work_item_dimensions>() {
1297  return 3;
1298 }
1299 
1300 template <>
1301 inline range<1> get_device_info_host<info::device::max_work_item_sizes<1>>() {
1302  // current value is the required minimum
1303  return {1};
1304 }
1305 
1306 template <>
1307 inline range<2> get_device_info_host<info::device::max_work_item_sizes<2>>() {
1308  // current value is the required minimum
1309  return {1, 1};
1310 }
1311 
1312 template <>
1313 inline range<3> get_device_info_host<info::device::max_work_item_sizes<3>>() {
1314  // current value is the required minimum
1315  return {1, 1, 1};
1316 }
1317 
1318 template <>
1319 inline constexpr size_t get_device_info_host<
1320  ext::oneapi::experimental::info::device::max_global_work_groups>() {
1321  // See handler.hpp for the maximum value :
1322  return static_cast<size_t>((std::numeric_limits<int>::max)());
1323 }
1324 
1325 template <>
1328  // See handler.hpp for the maximum value :
1329  static constexpr size_t Limit = get_device_info_host<
1330  ext::oneapi::experimental::info::device::max_global_work_groups>();
1331  return {Limit};
1332 }
1333 
1334 template <>
1335 inline id<2> get_device_info_host<
1336  ext::oneapi::experimental::info::device::max_work_groups<2>>() {
1337  // See handler.hpp for the maximum value :
1338  static constexpr size_t Limit = get_device_info_host<
1339  ext::oneapi::experimental::info::device::max_global_work_groups>();
1340  return {Limit, Limit};
1341 }
1342 
1343 template <>
1344 inline id<3> get_device_info_host<
1345  ext::oneapi::experimental::info::device::max_work_groups<3>>() {
1346  // See handler.hpp for the maximum value :
1347  static constexpr size_t Limit = get_device_info_host<
1348  ext::oneapi::experimental::info::device::max_global_work_groups>();
1349  return {Limit, Limit, Limit};
1350 }
1351 
1352 // TODO:remove with deprecated feature
1353 // device::get_info<info::device::ext_oneapi_max_global_work_groups>
1354 template <>
1355 inline constexpr size_t
1356 get_device_info_host<info::device::ext_oneapi_max_global_work_groups>() {
1357  return get_device_info_host<
1358  ext::oneapi::experimental::info::device::max_global_work_groups>();
1359 }
1360 
1361 // TODO:remove with deprecated feature
1362 // device::get_info<info::device::ext_oneapi_max_work_groups_1d>
1363 template <>
1364 inline id<1>
1365 get_device_info_host<info::device::ext_oneapi_max_work_groups_1d>() {
1366 
1367  return get_device_info_host<
1369 }
1370 
1371 // TODO:remove with deprecated feature
1372 // device::get_info<info::device::ext_oneapi_max_work_groups_2d>
1373 template <>
1374 inline id<2>
1375 get_device_info_host<info::device::ext_oneapi_max_work_groups_2d>() {
1376  return get_device_info_host<
1378 }
1379 
1380 // TODO:remove with deprecated feature
1381 // device::get_info<info::device::ext_oneapi_max_work_groups_3d>
1382 template <>
1383 inline id<3>
1384 get_device_info_host<info::device::ext_oneapi_max_work_groups_3d>() {
1385  return get_device_info_host<
1386  ext::oneapi::experimental::info::device::max_work_groups<3>>();
1387 }
1388 
1389 template <>
1390 inline size_t get_device_info_host<info::device::max_work_group_size>() {
1391  // current value is the required minimum
1392  return 1;
1393 }
1394 
1395 template <>
1396 inline uint32_t
1397 get_device_info_host<info::device::preferred_vector_width_char>() {
1398  // TODO update when appropriate
1399  return 1;
1400 }
1401 
1402 template <>
1403 inline uint32_t
1404 get_device_info_host<info::device::preferred_vector_width_short>() {
1405  // TODO update when appropriate
1406  return 1;
1407 }
1408 
1409 template <>
1410 inline uint32_t
1411 get_device_info_host<info::device::preferred_vector_width_int>() {
1412  // TODO update when appropriate
1413  return 1;
1414 }
1415 
1416 template <>
1417 inline uint32_t
1418 get_device_info_host<info::device::preferred_vector_width_long>() {
1419  // TODO update when appropriate
1420  return 1;
1421 }
1422 
1423 template <>
1424 inline uint32_t
1425 get_device_info_host<info::device::preferred_vector_width_float>() {
1426  // TODO update when appropriate
1427  return 1;
1428 }
1429 
1430 template <>
1431 inline uint32_t
1432 get_device_info_host<info::device::preferred_vector_width_double>() {
1433  // TODO update when appropriate
1434  return 1;
1435 }
1436 
1437 template <>
1438 inline uint32_t
1439 get_device_info_host<info::device::preferred_vector_width_half>() {
1440  // TODO update when appropriate
1441  return 0;
1442 }
1443 
1444 template <>
1445 inline uint32_t get_device_info_host<info::device::native_vector_width_char>() {
1447 }
1448 
1449 template <>
1450 inline uint32_t
1451 get_device_info_host<info::device::native_vector_width_short>() {
1453 }
1454 
1455 template <>
1456 inline uint32_t get_device_info_host<info::device::native_vector_width_int>() {
1458 }
1459 
1460 template <>
1461 inline uint32_t get_device_info_host<info::device::native_vector_width_long>() {
1463 }
1464 
1465 template <>
1466 inline uint32_t
1467 get_device_info_host<info::device::native_vector_width_float>() {
1469 }
1470 
1471 template <>
1472 inline uint32_t
1473 get_device_info_host<info::device::native_vector_width_double>() {
1475 }
1476 
1477 template <>
1478 inline uint32_t get_device_info_host<info::device::native_vector_width_half>() {
1480 }
1481 
1482 template <>
1483 inline uint32_t get_device_info_host<info::device::max_clock_frequency>() {
1485 }
1486 
1487 template <> inline uint32_t get_device_info_host<info::device::address_bits>() {
1488  return sizeof(void *) * 8;
1489 }
1490 
1491 template <>
1492 inline uint64_t get_device_info_host<info::device::global_mem_size>() {
1493  return static_cast<uint64_t>(OSUtil::getOSMemSize());
1494 }
1495 
1496 template <>
1497 inline uint64_t get_device_info_host<info::device::max_mem_alloc_size>() {
1498  // current value is the required minimum
1499  const uint64_t a = get_device_info_host<info::device::global_mem_size>() / 4;
1500  const uint64_t b = 128ul * 1024 * 1024;
1501  return (a > b) ? a : b;
1502 }
1503 
1504 template <> inline bool get_device_info_host<info::device::image_support>() {
1505  return true;
1506 }
1507 
1508 template <> inline bool get_device_info_host<info::device::atomic64>() {
1509  return false;
1510 }
1511 
1512 template <>
1513 inline std::vector<memory_order>
1514 get_device_info_host<info::device::atomic_memory_order_capabilities>() {
1517 }
1518 
1519 template <>
1520 inline std::vector<memory_order>
1521 get_device_info_host<info::device::atomic_fence_order_capabilities>() {
1524 }
1525 
1526 template <>
1527 inline std::vector<memory_scope>
1528 get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
1531 }
1532 
1533 template <>
1534 inline std::vector<memory_scope>
1535 get_device_info_host<info::device::atomic_fence_scope_capabilities>() {
1538 }
1539 
1540 template <>
1541 inline bool
1542 get_device_info_host<info::device::ext_oneapi_bfloat16_math_functions>() {
1543  return false;
1544 }
1545 
1546 template <>
1547 inline uint32_t get_device_info_host<info::device::max_read_image_args>() {
1548  // current value is the required minimum
1549  return 128;
1550 }
1551 
1552 template <>
1553 inline uint32_t get_device_info_host<info::device::max_write_image_args>() {
1554  // current value is the required minimum
1555  return 8;
1556 }
1557 
1558 template <>
1559 inline size_t get_device_info_host<info::device::image2d_max_width>() {
1560  // SYCL guarantees at least 8192. Some devices already known to provide more
1561  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1562  // image object allocation on host.
1563  // Using any fixed number (i.e. 16384) brings the risk of having similar
1564  // issues on newer devices in future. Thus it does not make sense limiting
1565  // the returned value on host. Practially speaking the returned value on host
1566  // depends only on memory required for the image, which also depends on
1567  // the image channel_type and the image height. Both are not known in this
1568  // query, thus it becomes user's responsibility to choose proper image
1569  // parameters depending on similar query to (non-host device) and amount
1570  // of available/allocatable memory.
1572 }
1573 
1574 template <>
1575 inline size_t get_device_info_host<info::device::image2d_max_height>() {
1576  // SYCL guarantees at least 8192. Some devices already known to provide more
1577  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1578  // image object allocation on host.
1579  // Using any fixed number (i.e. 16384) brings the risk of having similar
1580  // issues on newer devices in future. Thus it does not make sense limiting
1581  // the returned value on host. Practially speaking the returned value on host
1582  // depends only on memory required for the image, which also depends on
1583  // the image channel_type and the image width. Both are not known in this
1584  // query, thus it becomes user's responsibility to choose proper image
1585  // parameters depending on similar query to (non-host device) and amount
1586  // of available/allocatable memory.
1588 }
1589 
1590 template <>
1591 inline size_t get_device_info_host<info::device::image3d_max_width>() {
1592  // SYCL guarantees at least 8192. Some devices already known to provide more
1593  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1594  // image object allocation on host.
1595  // Using any fixed number (i.e. 16384) brings the risk of having similar
1596  // issues on newer devices in future. Thus it does not make sense limiting
1597  // the returned value on host. Practially speaking the returned value on host
1598  // depends only on memory required for the image, which also depends on
1599  // the image channel_type and the image height/depth. Both are not known
1600  // in this query, thus it becomes user's responsibility to choose proper image
1601  // parameters depending on similar query to (non-host device) and amount
1602  // of available/allocatable memory.
1604 }
1605 
1606 template <>
1607 inline size_t get_device_info_host<info::device::image3d_max_height>() {
1608  // SYCL guarantees at least 8192. Some devices already known to provide more
1609  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1610  // image object allocation on host.
1611  // Using any fixed number (i.e. 16384) brings the risk of having similar
1612  // issues on newer devices in future. Thus it does not make sense limiting
1613  // the returned value on host. Practially speaking the returned value on host
1614  // depends only on memory required for the image, which also depends on
1615  // the image channel_type and the image width/depth. Both are not known
1616  // in this query, thus it becomes user's responsibility to choose proper image
1617  // parameters depending on similar query to (non-host device) and amount
1618  // of available/allocatable memory.
1620 }
1621 
1622 template <>
1623 inline size_t get_device_info_host<info::device::image3d_max_depth>() {
1624  // SYCL guarantees at least 8192. Some devices already known to provide more
1625  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1626  // image object allocation on host.
1627  // Using any fixed number (i.e. 16384) brings the risk of having similar
1628  // issues on newer devices in future. Thus it does not make sense limiting
1629  // the returned value on host. Practially speaking the returned value on host
1630  // depends only on memory required for the image, which also depends on
1631  // the image channel_type and the image height/width, which are not known
1632  // in this query, thus it becomes user's responsibility to choose proper image
1633  // parameters depending on similar query to (non-host device) and amount
1634  // of available/allocatable memory.
1636 }
1637 
1638 template <>
1639 inline size_t get_device_info_host<info::device::image_max_buffer_size>() {
1640  // Not supported in SYCL
1641  return 0;
1642 }
1643 
1644 template <>
1645 inline size_t get_device_info_host<info::device::image_max_array_size>() {
1646  // current value is the required minimum
1647  return 2048;
1648 }
1649 
1650 template <> inline uint32_t get_device_info_host<info::device::max_samplers>() {
1651  // current value is the required minimum
1652  return 16;
1653 }
1654 
1655 template <>
1656 inline size_t get_device_info_host<info::device::max_parameter_size>() {
1657  // current value is the required minimum
1658  return 1024;
1659 }
1660 
1661 template <>
1662 inline uint32_t get_device_info_host<info::device::mem_base_addr_align>() {
1663  return 1024;
1664 }
1665 
1666 template <>
1667 inline std::vector<info::fp_config>
1668 get_device_info_host<info::device::half_fp_config>() {
1669  // current value is the required minimum
1670  return {};
1671 }
1672 
1673 template <>
1674 inline std::vector<info::fp_config>
1675 get_device_info_host<info::device::single_fp_config>() {
1676  // current value is the required minimum
1678 }
1679 
1680 template <>
1681 inline std::vector<info::fp_config>
1682 get_device_info_host<info::device::double_fp_config>() {
1683  // current value is the required minimum
1687 }
1688 
1689 template <>
1691 get_device_info_host<info::device::global_mem_cache_type>() {
1693 }
1694 
1695 template <>
1696 inline uint32_t
1697 get_device_info_host<info::device::global_mem_cache_line_size>() {
1699 }
1700 
1701 template <>
1702 inline uint64_t get_device_info_host<info::device::global_mem_cache_size>() {
1704 }
1705 
1706 template <>
1707 inline uint64_t get_device_info_host<info::device::max_constant_buffer_size>() {
1708  // current value is the required minimum
1709  return 64 * 1024;
1710 }
1711 
1712 template <>
1713 inline uint32_t get_device_info_host<info::device::max_constant_args>() {
1714  // current value is the required minimum
1715  return 8;
1716 }
1717 
1718 template <>
1720 get_device_info_host<info::device::local_mem_type>() {
1722 }
1723 
1724 template <>
1725 inline uint64_t get_device_info_host<info::device::local_mem_size>() {
1726  // current value is the required minimum
1727  return 32 * 1024;
1728 }
1729 
1730 template <>
1731 inline bool get_device_info_host<info::device::error_correction_support>() {
1732  return false;
1733 }
1734 
1735 template <>
1736 inline bool get_device_info_host<info::device::host_unified_memory>() {
1737  return true;
1738 }
1739 
1740 template <>
1741 inline size_t get_device_info_host<info::device::profiling_timer_resolution>() {
1742  typedef std::ratio_divide<std::chrono::high_resolution_clock::period,
1743  std::nano>
1744  ns_period;
1745  return ns_period::num / ns_period::den;
1746 }
1747 
1748 template <> inline bool get_device_info_host<info::device::is_endian_little>() {
1749  union {
1750  uint16_t a;
1751  uint8_t b[2];
1752  } u = {0x0100};
1753 
1754  return u.b[1];
1755 }
1756 
1757 template <> inline bool get_device_info_host<info::device::is_available>() {
1758  return true;
1759 }
1760 
1761 template <>
1762 inline bool get_device_info_host<info::device::is_compiler_available>() {
1763  return true;
1764 }
1765 
1766 template <>
1767 inline bool get_device_info_host<info::device::is_linker_available>() {
1768  return true;
1769 }
1770 
1771 template <>
1772 inline std::vector<info::execution_capability>
1773 get_device_info_host<info::device::execution_capabilities>() {
1775 }
1776 
1777 template <> inline bool get_device_info_host<info::device::queue_profiling>() {
1778  return true;
1779 }
1780 
1781 template <>
1782 inline std::vector<kernel_id>
1783 get_device_info_host<info::device::built_in_kernel_ids>() {
1784  return {};
1785 }
1786 
1787 template <>
1788 inline std::vector<std::string>
1789 get_device_info_host<info::device::built_in_kernels>() {
1790  return {};
1791 }
1792 
1793 template <> inline platform get_device_info_host<info::device::platform>() {
1794  return createSyclObjFromImpl<platform>(platform_impl::getHostPlatformImpl());
1795 }
1796 
1797 template <> inline std::string get_device_info_host<info::device::name>() {
1798  return "SYCL host device";
1799 }
1800 
1801 template <> inline std::string get_device_info_host<info::device::vendor>() {
1802  return "";
1803 }
1804 
1805 template <>
1806 inline std::string get_device_info_host<info::device::driver_version>() {
1807  return "1.2";
1808 }
1809 
1810 template <> inline std::string get_device_info_host<info::device::profile>() {
1811  return "FULL PROFILE";
1812 }
1813 
1814 template <> inline std::string get_device_info_host<info::device::version>() {
1815  return "1.2";
1816 }
1817 
1818 template <>
1819 inline std::string get_device_info_host<info::device::opencl_c_version>() {
1820  return "not applicable";
1821 }
1822 
1823 template <>
1824 inline std::vector<std::string>
1825 get_device_info_host<info::device::extensions>() {
1826  // TODO update when appropriate
1827  return {};
1828 }
1829 
1830 template <>
1831 inline size_t get_device_info_host<info::device::printf_buffer_size>() {
1832  // current value is the required minimum
1833  return 1024 * 1024;
1834 }
1835 
1836 template <>
1837 inline bool get_device_info_host<info::device::preferred_interop_user_sync>() {
1838  return false;
1839 }
1840 
1841 template <> inline device get_device_info_host<info::device::parent_device>() {
1842  throw invalid_object_error(
1843  "Partitioning to subdevices of the host device is not implemented",
1844  PI_ERROR_INVALID_DEVICE);
1845 }
1846 
1847 template <>
1848 inline uint32_t
1849 get_device_info_host<info::device::partition_max_sub_devices>() {
1850  // TODO update once subdevice creation is enabled
1851  return 1;
1852 }
1853 
1854 template <>
1855 inline std::vector<info::partition_property>
1856 get_device_info_host<info::device::partition_properties>() {
1857  // TODO update once subdevice creation is enabled
1858  return {};
1859 }
1860 
1861 template <>
1862 inline std::vector<info::partition_affinity_domain>
1863 get_device_info_host<info::device::partition_affinity_domains>() {
1864  // TODO update once subdevice creation is enabled
1865  return {};
1866 }
1867 
1868 template <>
1870 get_device_info_host<info::device::partition_type_property>() {
1872 }
1873 
1874 template <>
1876 get_device_info_host<info::device::partition_type_affinity_domain>() {
1877  // TODO update once subdevice creation is enabled
1879 }
1880 
1881 template <>
1882 inline uint32_t get_device_info_host<info::device::reference_count>() {
1883  // TODO update once subdevice creation is enabled
1884  return 1;
1885 }
1886 
1887 template <>
1888 inline uint32_t get_device_info_host<info::device::max_num_sub_groups>() {
1889  // TODO update once subgroups are enabled
1890  throw runtime_error("Sub-group feature is not supported on HOST device.",
1891  PI_ERROR_INVALID_DEVICE);
1892 }
1893 
1894 template <>
1895 inline std::vector<size_t>
1896 get_device_info_host<info::device::sub_group_sizes>() {
1897  // TODO update once subgroups are enabled
1898  throw runtime_error("Sub-group feature is not supported on HOST device.",
1899  PI_ERROR_INVALID_DEVICE);
1900 }
1901 
1902 template <>
1903 inline bool
1904 get_device_info_host<info::device::sub_group_independent_forward_progress>() {
1905  // TODO update once subgroups are enabled
1906  throw runtime_error("Sub-group feature is not supported on HOST device.",
1907  PI_ERROR_INVALID_DEVICE);
1908 }
1909 
1910 template <>
1911 inline bool get_device_info_host<info::device::kernel_kernel_pipe_support>() {
1912  return false;
1913 }
1914 
1915 template <>
1916 inline std::string get_device_info_host<info::device::backend_version>() {
1917  throw runtime_error(
1918  "Backend version feature is not supported on HOST device.",
1919  PI_ERROR_INVALID_DEVICE);
1920 }
1921 
1922 template <>
1923 inline bool get_device_info_host<info::device::usm_device_allocations>() {
1924  return true;
1925 }
1926 
1927 template <>
1928 inline bool get_device_info_host<info::device::usm_host_allocations>() {
1929  return true;
1930 }
1931 
1932 template <>
1933 inline bool get_device_info_host<info::device::usm_shared_allocations>() {
1934  return true;
1935 }
1936 
1937 template <>
1938 inline bool
1939 get_device_info_host<info::device::usm_restricted_shared_allocations>() {
1940  return true;
1941 }
1942 
1943 template <>
1944 inline bool get_device_info_host<info::device::usm_system_allocations>() {
1945  return true;
1946 }
1947 
1948 template <>
1949 inline bool get_device_info_host<info::device::ext_intel_mem_channel>() {
1950  return false;
1951 }
1952 
1953 // Specializations for intel extensions for Level Zero low-level
1954 // detail device descriptors (not support on host).
1955 template <>
1956 inline uint32_t get_device_info_host<ext::intel::info::device::device_id>() {
1957  throw runtime_error("Obtaining the device ID is not supported on HOST device",
1958  PI_ERROR_INVALID_DEVICE);
1959 }
1960 template <>
1962 get_device_info_host<ext::intel::info::device::pci_address>() {
1963  throw runtime_error(
1964  "Obtaining the PCI address is not supported on HOST device",
1965  PI_ERROR_INVALID_DEVICE);
1966 }
1967 template <>
1968 inline uint32_t get_device_info_host<ext::intel::info::device::gpu_eu_count>() {
1969  throw runtime_error("Obtaining the EU count is not supported on HOST device",
1970  PI_ERROR_INVALID_DEVICE);
1971 }
1972 template <>
1973 inline uint32_t
1974 get_device_info_host<ext::intel::info::device::gpu_eu_simd_width>() {
1975  throw runtime_error(
1976  "Obtaining the EU SIMD width is not supported on HOST device",
1977  PI_ERROR_INVALID_DEVICE);
1978 }
1979 template <>
1980 inline uint32_t get_device_info_host<ext::intel::info::device::gpu_slices>() {
1981  throw runtime_error(
1982  "Obtaining the number of slices is not supported on HOST device",
1983  PI_ERROR_INVALID_DEVICE);
1984 }
1985 template <>
1986 inline uint32_t
1987 get_device_info_host<ext::intel::info::device::gpu_subslices_per_slice>() {
1988  throw runtime_error("Obtaining the number of subslices per slice is not "
1989  "supported on HOST device",
1990  PI_ERROR_INVALID_DEVICE);
1991 }
1992 template <>
1993 inline uint32_t
1994 get_device_info_host<ext::intel::info::device::gpu_eu_count_per_subslice>() {
1995  throw runtime_error(
1996  "Obtaining the EU count per subslice is not supported on HOST device",
1997  PI_ERROR_INVALID_DEVICE);
1998 }
1999 template <>
2000 inline uint32_t
2001 get_device_info_host<ext::intel::info::device::gpu_hw_threads_per_eu>() {
2002  throw runtime_error(
2003  "Obtaining the HW threads count per EU is not supported on HOST device",
2004  PI_ERROR_INVALID_DEVICE);
2005 }
2006 template <>
2007 inline uint64_t
2008 get_device_info_host<ext::intel::info::device::max_mem_bandwidth>() {
2009  throw runtime_error(
2010  "Obtaining the maximum memory bandwidth is not supported on HOST device",
2011  PI_ERROR_INVALID_DEVICE);
2012 }
2013 template <>
2015 get_device_info_host<ext::intel::info::device::uuid>() {
2016  throw runtime_error(
2017  "Obtaining the device uuid is not supported on HOST device",
2018  PI_ERROR_INVALID_DEVICE);
2019 }
2020 
2021 // TODO: Remove with deprecated feature
2022 // device::get_info<info::device::ext_intel_pci_address>()
2023 template <>
2024 inline std::string get_device_info_host<info::device::ext_intel_pci_address>() {
2025  throw runtime_error(
2026  "Obtaining the PCI address is not supported on HOST device",
2027  PI_ERROR_INVALID_DEVICE);
2028 }
2029 // TODO: Remove with deprecated feature
2030 // device::get_info<info::device::ext_intel_gpu_eu_count>()
2031 template <>
2032 inline uint32_t get_device_info_host<info::device::ext_intel_gpu_eu_count>() {
2033  throw runtime_error("Obtaining the EU count is not supported on HOST device",
2034  PI_ERROR_INVALID_DEVICE);
2035 }
2036 // TODO: Remove with deprecated feature
2037 // device::get_info<info::device::ext_intel_gpu_eu_simd_width>()
2038 template <>
2039 inline uint32_t
2040 get_device_info_host<info::device::ext_intel_gpu_eu_simd_width>() {
2041  throw runtime_error(
2042  "Obtaining the EU SIMD width is not supported on HOST device",
2043  PI_ERROR_INVALID_DEVICE);
2044 }
2045 // TODO: Remove with deprecated feature
2046 // device::get_info<info::device::ext_intel_gpu_slices>()
2047 template <>
2048 inline uint32_t get_device_info_host<info::device::ext_intel_gpu_slices>() {
2049  throw runtime_error(
2050  "Obtaining the number of slices is not supported on HOST device",
2051  PI_ERROR_INVALID_DEVICE);
2052 }
2053 // TODO: Remove with deprecated feature
2054 // device::get_info<info::device::ext_intel_gpu_subslices_per_slice>()
2055 template <>
2056 inline uint32_t
2057 get_device_info_host<info::device::ext_intel_gpu_subslices_per_slice>() {
2058  throw runtime_error("Obtaining the number of subslices per slice is not "
2059  "supported on HOST device",
2060  PI_ERROR_INVALID_DEVICE);
2061 }
2062 // TODO: Remove with deprecated feature
2063 // device::get_info<info::device::ext_intel_gpu_eu_count_per_subslices>()
2064 template <>
2065 inline uint32_t
2066 get_device_info_host<info::device::ext_intel_gpu_eu_count_per_subslice>() {
2067  throw runtime_error(
2068  "Obtaining the EU count per subslice is not supported on HOST device",
2069  PI_ERROR_INVALID_DEVICE);
2070 }
2071 // TODO: Remove with deprecated feature
2072 // device::get_info<info::device::ext_intel_gpu_hw_threads_per_eu>()
2073 template <>
2074 inline uint32_t
2075 get_device_info_host<info::device::ext_intel_gpu_hw_threads_per_eu>() {
2076  throw runtime_error(
2077  "Obtaining the HW threads count per EU is not supported on HOST device",
2078  PI_ERROR_INVALID_DEVICE);
2079 }
2080 // TODO: Remove with deprecated feature
2081 // device::get_info<info::device::ext_intel_max_mem_bandwidth>()
2082 template <>
2083 inline uint64_t
2084 get_device_info_host<info::device::ext_intel_max_mem_bandwidth>() {
2085  throw runtime_error(
2086  "Obtaining the maximum memory bandwidth is not supported on HOST device",
2087  PI_ERROR_INVALID_DEVICE);
2088 }
2089 // TODO:Move to namespace ext::intel::info::device
2090 template <> inline bool get_device_info_host<info::device::ext_oneapi_srgb>() {
2091  return false;
2092 }
2093 
2094 // TODO: Remove with deprecated feature
2095 // device::get_info<info::device::ext_intel_device_info_uuid>()
2096 template <>
2098 get_device_info_host<info::device::ext_intel_device_info_uuid>() {
2099  throw runtime_error(
2100  "Obtaining the device uuid is not supported on HOST device",
2101  PI_ERROR_INVALID_DEVICE);
2102 }
2103 
2104 template <>
2105 inline uint64_t get_device_info_host<ext::intel::info::device::free_memory>() {
2106  throw runtime_error(
2107  "Obtaining the device free memory is not supported on HOST device",
2108  PI_ERROR_INVALID_DEVICE);
2109 }
2110 
2111 template <>
2112 inline uint32_t
2113 get_device_info_host<ext::intel::info::device::memory_clock_rate>() {
2114  throw runtime_error(
2115  "Obtaining the device memory clock rate is not supported on HOST device",
2116  PI_ERROR_INVALID_DEVICE);
2117 }
2118 
2119 template <>
2120 inline uint32_t
2121 get_device_info_host<ext::intel::info::device::memory_bus_width>() {
2122  throw runtime_error(
2123  "Obtaining the device memory bus width is not supported on HOST device",
2124  PI_ERROR_INVALID_DEVICE);
2125 }
2126 
2127 template <>
2128 inline int32_t
2129 get_device_info_host<ext::intel::info::device::max_compute_queue_indices>() {
2130  throw runtime_error(
2131  "Obtaining max compute queue indices is not supported on HOST device",
2132  PI_ERROR_INVALID_DEVICE);
2133 }
2134 
2135 template <>
2137  ext::codeplay::experimental::info::device::supports_fusion>() {
2138  // No support for fusion on the host device.
2139  return false;
2140 }
2141 
2142 template <>
2143 inline uint32_t get_device_info_host<
2144  ext::codeplay::experimental::info::device::max_registers_per_work_group>() {
2145  throw runtime_error("Obtaining the maximum number of available registers per "
2146  "work-group is not supported on HOST device",
2147  PI_ERROR_INVALID_DEVICE);
2148 }
2149 
2150 template <>
2151 inline uint32_t get_device_info_host<
2152  ext::oneapi::experimental::info::device::image_row_pitch_align>() {
2153  throw runtime_error("Obtaining image pitch alignment is not "
2154  "supported on HOST device",
2155  PI_ERROR_INVALID_DEVICE);
2156 }
2157 
2158 template <>
2159 inline uint32_t get_device_info_host<
2160  ext::oneapi::experimental::info::device::max_image_linear_row_pitch>() {
2161  throw runtime_error("Obtaining max image linear pitch is not "
2162  "supported on HOST device",
2163  PI_ERROR_INVALID_DEVICE);
2164 }
2165 
2166 template <>
2167 inline std::vector<ext::oneapi::experimental::matrix::combination>
2169  ext::oneapi::experimental::info::device::matrix_combinations>() {
2170  throw runtime_error("Obtaining matrix combinations is not "
2171  "supported on HOST device",
2172  PI_ERROR_INVALID_DEVICE);
2173 }
2174 
2175 template <>
2176 inline uint32_t get_device_info_host<
2177  ext::oneapi::experimental::info::device::max_image_linear_width>() {
2178  throw runtime_error("Obtaining max image linear width is not "
2179  "supported on HOST device",
2180  PI_ERROR_INVALID_DEVICE);
2181 }
2182 
2183 template <>
2184 inline uint32_t get_device_info_host<
2185  ext::oneapi::experimental::info::device::max_image_linear_height>() {
2186  throw runtime_error("Obtaining max image linear height is not "
2187  "supported on HOST device",
2188  PI_ERROR_INVALID_DEVICE);
2189 }
2190 
2191 template <>
2193  ext::oneapi::experimental::info::device::mipmap_max_anisotropy>() {
2194  throw runtime_error("Bindless image mipaps are not supported on HOST device",
2195  PI_ERROR_INVALID_DEVICE);
2196 }
2197 
2198 template <>
2199 inline std::vector<sycl::device> get_device_info_host<
2200  ext::oneapi::experimental::info::device::component_devices>() {
2201  throw runtime_error("Host devices cannot be component devices.",
2202  PI_ERROR_INVALID_DEVICE);
2203 }
2204 
2205 template <>
2207  ext::oneapi::experimental::info::device::composite_device>() {
2208  throw runtime_error("Host devices cannot be composite devices.",
2209  PI_ERROR_INVALID_DEVICE);
2210 }
2211 
2212 } // namespace detail
2213 } // namespace _V1
2214 } // namespace sycl
static size_t getOSMemSize()
Returns the amount of RAM available for the operating system.
Definition: os_util.cpp:203
static ProgramManager & getInstance()
std::string get_device_info_string(sycl::detail::pi::PiDeviceInfo InfoCode) const
Get device info string.
const PluginPtr & getPlugin() const
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
Definition: device_impl.hpp:66
static std::shared_ptr< platform_impl > getHostPlatformImpl()
Static functions that help maintain platform uniquess and equality of comparison.
static std::shared_ptr< platform_impl > getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, const PluginPtr &Plugin)
Queries the cache to see if the specified PiPlatform has been seen before.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:66
const std::error_code & code() const noexcept
Definition: exception.cpp:83
A unique identifier of an item in an index space.
Definition: id.hpp:36
Encapsulates a SYCL platform on which kernels may be executed.
Definition: platform.hpp:109
detail::ABINeutralT_t< typename detail::is_platform_info_desc< Param >::return_type > get_info() const
Queries this SYCL platform for info.
Definition: platform.cpp:65
#define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN)
::pi_device PiDevice
Definition: pi.hpp:131
::pi_platform PiPlatform
Definition: pi.hpp:129
std::vector< memory_scope > readMemoryScopeBitfield(pi_memory_scope_capabilities bits)
range< 2 > construct_range< 2 >(size_t *values)
std::vector< info::fp_config > read_fp_bitfield(pi_device_fp_config bits)
Definition: device_info.hpp:34
range< Dimensions > construct_range(size_t *values)=delete
constexpr std::pair< const int, oneapi_exp_arch > IntelGPUArchitectures[]
constexpr std::pair< const int, oneapi_exp_arch > IntelCPUArchitectures[]
range< 3 > construct_range< 3 >(size_t *values)
std::string affinityDomainToString(info::partition_affinity_domain AffinityDomain)
Definition: device_info.hpp:84
std::string string_view
Definition: handler.hpp:427
static bool is_sycl_partition_property(info::partition_property PP)
Param::return_type get_device_info_host()=delete
std::vector< std::string > split_string(const std::string &str, char delimeter)
Definition: common.cpp:74
std::vector< info::partition_affinity_domain > read_domain_bitfield(pi_device_affinity_domain bits)
Definition: device_info.hpp:56
std::shared_ptr< device_impl > DeviceImplPtr
Param::return_type get_device_info(const DeviceImplPtr &Dev)
constexpr std::pair< const char *, oneapi_exp_arch > NvidiaAmdGPUArchitectures[]
std::vector< memory_order > readMemoryOrderBitfield(pi_memory_order_capabilities bits)
std::array< unsigned char, 16 > uuid_type
std::string string
Definition: handler.hpp:426
std::vector< info::execution_capability > read_execution_bitfield(pi_device_exec_capabilities bits)
Definition: device_info.hpp:74
range< 1 > construct_range< 1 >(size_t *values)
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:3233
T detail::marray_element_t< T > y T T T maxval[i] T T T a
y y maxval[j] maxval b
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:107
Definition: access.hpp:18
static constexpr pi_device_fp_config PI_FP_SOFT_FLOAT
Definition: pi.h:867
pi_bitfield pi_device_exec_capabilities
Definition: pi.h:700
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE
Definition: pi.h:857
_pi_result
Definition: pi.h:216
pi_uint32 pi_bool
Definition: pi.h:207
_pi_usm_capabilities pi_usm_capabilities
Definition: pi.h:1960
_pi_device_info
Definition: pi.h:296
@ PI_DEVICE_INFO_PARTITION_TYPE
Definition: pi.h:372
static constexpr pi_device_fp_config PI_FP_DENORM
Definition: pi.h:861
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE
Definition: pi.h:852
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE
Definition: pi.h:850
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:78
_pi_usm_capabilities
Definition: pi.h:1931
@ PI_USM_ACCESS
Definition: pi.h:1932
@ PI_USM_CONCURRENT_ACCESS
Definition: pi.h:1934
pi_bitfield pi_queue_properties
Definition: pi.h:773
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE
Definition: pi.h:848
static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST
Definition: pi.h:863
static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF
Definition: pi.h:865
_pi_device * pi_device
Definition: pi.h:1129
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
Definition: pi.h:778
static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
Definition: pi.h:868
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE
Definition: pi.h:854
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA
Definition: pi.h:846
pi_bitfield pi_memory_scope_capabilities
Definition: pi.h:720
pi_bitfield pi_device_fp_config
Definition: pi.h:860
static constexpr pi_device_exec_capabilities PI_EXEC_NATIVE_KERNEL
Definition: pi.h:874
static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO
Definition: pi.h:864
static constexpr pi_device_fp_config PI_FP_FMA
Definition: pi.h:866
static constexpr pi_device_exec_capabilities PI_EXEC_KERNEL
Definition: pi.h:873
static constexpr pi_device_fp_config PI_FP_INF_NAN
Definition: pi.h:862
pi_bitfield pi_device_affinity_domain
Definition: pi.h:845
pi_bitfield pi_memory_order_capabilities
Definition: pi.h:713
C++ wrapper of extern "C" PI interfaces.
static uint32_t getMemCacheLineSize()
static uint32_t getNativeVectorWidth(TypeIndex Index)
Returns the maximum vector width counted in elements of the given type.
static uint32_t getMaxClockFrequency()
static platform get(const DeviceImplPtr &Dev)
static std::string get(const DeviceImplPtr &Dev)
static std::vector< info::fp_config > get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)