DPC++ Runtime
Runtime libraries for oneAPI DPC++
device_info.hpp
Go to the documentation of this file.
1 //==-------- device_info.hpp - SYCL device info methods --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 #include <detail/device_impl.hpp>
11 #include <detail/jit_compiler.hpp>
12 #include <detail/platform_impl.hpp>
13 #include <detail/platform_util.hpp>
14 #include <detail/plugin.hpp>
16 #include <sycl/detail/defines.hpp>
17 #include <sycl/detail/os_util.hpp>
18 #include <sycl/detail/pi.hpp>
19 #include <sycl/device.hpp>
22 #include <sycl/feature_test.hpp>
23 #include <sycl/info/info_desc.hpp>
24 #include <sycl/memory_enums.hpp>
25 #include <sycl/platform.hpp>
26 
27 #include <chrono>
28 #include <sstream>
29 #include <thread>
30 
31 #include "split_string.hpp"
32 
33 namespace sycl {
34 inline namespace _V1 {
35 namespace detail {
36 
37 inline std::vector<info::fp_config> read_fp_bitfield(pi_device_fp_config bits) {
38  std::vector<info::fp_config> result;
39  if (bits & PI_FP_DENORM)
40  result.push_back(info::fp_config::denorm);
41  if (bits & PI_FP_INF_NAN)
42  result.push_back(info::fp_config::inf_nan);
43  if (bits & PI_FP_ROUND_TO_NEAREST)
44  result.push_back(info::fp_config::round_to_nearest);
45  if (bits & PI_FP_ROUND_TO_ZERO)
46  result.push_back(info::fp_config::round_to_zero);
47  if (bits & PI_FP_ROUND_TO_INF)
48  result.push_back(info::fp_config::round_to_inf);
49  if (bits & PI_FP_FMA)
50  result.push_back(info::fp_config::fma);
51  if (bits & PI_FP_SOFT_FLOAT)
52  result.push_back(info::fp_config::soft_float);
55  return result;
56 }
57 
58 inline std::vector<info::partition_affinity_domain>
60  std::vector<info::partition_affinity_domain> result;
62  result.push_back(info::partition_affinity_domain::numa);
73  return result;
74 }
75 
76 inline std::vector<info::execution_capability>
78  std::vector<info::execution_capability> result;
79  if (bits & PI_EXEC_KERNEL)
81  if (bits & PI_EXEC_NATIVE_KERNEL)
83  return result;
84 }
85 
86 inline std::string
88  switch (AffinityDomain) {
89 #define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN) \
90  case DOMAIN: \
91  return #DOMAIN;
92 
94  sycl::info::partition_affinity_domain::numa)
96  sycl::info::partition_affinity_domain::L4_cache)
98  sycl::info::partition_affinity_domain::L3_cache)
100  sycl::info::partition_affinity_domain::L2_cache)
102  sycl::info::partition_affinity_domain::L1_cache)
104  sycl::info::partition_affinity_domain::next_partitionable)
105 #undef __SYCL_AFFINITY_DOMAIN_STRING_CASE
106  default:
107  assert(false && "Missing case for affinity domain.");
108  return "unknown";
109  }
110 }
111 
112 // Mapping expected SYCL return types to those returned by PI calls
113 template <typename T> struct sycl_to_pi {
114  using type = T;
115 };
116 template <> struct sycl_to_pi<bool> {
117  using type = pi_bool;
118 };
119 template <> struct sycl_to_pi<device> {
121 };
122 template <> struct sycl_to_pi<platform> {
124 };
125 
126 // Mapping fp_config device info types to the values used to check fp support
127 template <typename Param> struct check_fp_support {};
128 
129 template <> struct check_fp_support<info::device::half_fp_config> {
130  using type = info::device::native_vector_width_half;
131 };
132 
133 template <> struct check_fp_support<info::device::double_fp_config> {
134  using type = info::device::native_vector_width_double;
135 };
136 
137 // Structs for emulating function template partial specialization
138 // Default template for the general case
139 // TODO: get rid of remaining uses of OpenCL directly
140 //
141 template <typename ReturnT, typename Param> struct get_device_info_impl {
142  static ReturnT get(const DeviceImplPtr &Dev) {
143  typename sycl_to_pi<ReturnT>::type result;
144  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
145  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
146  nullptr);
147  return ReturnT(result);
148  }
149 };
150 
151 // Specialization for platform
152 template <typename Param> struct get_device_info_impl<platform, Param> {
153  static platform get(const DeviceImplPtr &Dev) {
154  typename sycl_to_pi<platform>::type result;
155  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
156  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
157  nullptr);
158  // TODO: Change PiDevice to device_impl.
159  // Use the Plugin from the device_impl class after plugin details
160  // are added to the class.
161  return createSyclObjFromImpl<platform>(
162  platform_impl::getOrMakePlatformImpl(result, Dev->getPlugin()));
163  }
164 };
165 
166 // Helper function to allow using the specialization of get_device_info_impl
167 // for string return type in other specializations.
169  sycl::detail::pi::PiDeviceInfo InfoCode) const {
170  size_t resultSize = 0;
171  getPlugin()->call<PiApiKind::piDeviceGetInfo>(getHandleRef(), InfoCode, 0,
172  nullptr, &resultSize);
173  if (resultSize == 0) {
174  return std::string();
175  }
176  std::unique_ptr<char[]> result(new char[resultSize]);
178  getHandleRef(), InfoCode, resultSize, result.get(), nullptr);
179 
180  return std::string(result.get());
181 }
182 
183 // Specialization for string return type, variable return size
184 template <typename Param> struct get_device_info_impl<std::string, Param> {
185  static std::string get(const DeviceImplPtr &Dev) {
186  return Dev->get_device_info_string(PiInfoCode<Param>::value);
187  }
188 };
189 
190 // Specialization for parent device
191 template <typename ReturnT>
192 struct get_device_info_impl<ReturnT, info::device::parent_device> {
193  static ReturnT get(const DeviceImplPtr &Dev);
194 };
195 
196 // Specialization for fp_config types, checks the corresponding fp type support
197 template <typename Param>
198 struct get_device_info_impl<std::vector<info::fp_config>, Param> {
199  static std::vector<info::fp_config> get(const DeviceImplPtr &Dev) {
200  // Check if fp type is supported
203  typename check_fp_support<Param>::type>::get(Dev)) {
204  return {};
205  }
206  cl_device_fp_config result;
207  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
208  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
209  nullptr);
210  return read_fp_bitfield(result);
211  }
212 };
213 
214 // Specialization for device version
215 template <> struct get_device_info_impl<std::string, info::device::version> {
216  static std::string get(const DeviceImplPtr &Dev) {
217  return Dev->get_device_info_string(
219  }
220 };
221 
222 // Specialization for single_fp_config, no type support check required
223 template <>
224 struct get_device_info_impl<std::vector<info::fp_config>,
225  info::device::single_fp_config> {
226  static std::vector<info::fp_config> get(const DeviceImplPtr &Dev) {
227  pi_device_fp_config result;
228  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
230  sizeof(result), &result, nullptr);
231  return read_fp_bitfield(result);
232  }
233 };
234 
235 // Specialization for queue_profiling. In addition to pi_queue level profiling,
236 // piGetDeviceAndHostTimer is not supported, command_submit, command_start,
237 // command_end will be calculated. See MFallbackProfiling
238 template <> struct get_device_info_impl<bool, info::device::queue_profiling> {
239  static bool get(const DeviceImplPtr &Dev) {
240  pi_queue_properties Properties;
241  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
243  sizeof(Properties), &Properties, nullptr);
244  return Properties & PI_QUEUE_FLAG_PROFILING_ENABLE;
245  }
246 };
247 
248 // Specialization for atomic_memory_order_capabilities, PI returns a bitfield
249 template <>
250 struct get_device_info_impl<std::vector<memory_order>,
251  info::device::atomic_memory_order_capabilities> {
252  static std::vector<memory_order> get(const DeviceImplPtr &Dev) {
254  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
255  Dev->getHandleRef(),
257  sizeof(pi_memory_order_capabilities), &result, nullptr);
258  return readMemoryOrderBitfield(result);
259  }
260 };
261 
262 // Specialization for atomic_fence_order_capabilities, PI returns a bitfield
263 template <>
264 struct get_device_info_impl<std::vector<memory_order>,
265  info::device::atomic_fence_order_capabilities> {
266  static std::vector<memory_order> get(const DeviceImplPtr &Dev) {
268  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
269  Dev->getHandleRef(),
271  sizeof(pi_memory_order_capabilities), &result, nullptr);
272  return readMemoryOrderBitfield(result);
273  }
274 };
275 
276 // Specialization for atomic_memory_scope_capabilities, PI returns a bitfield
277 template <>
278 struct get_device_info_impl<std::vector<memory_scope>,
279  info::device::atomic_memory_scope_capabilities> {
280  static std::vector<memory_scope> get(const DeviceImplPtr &Dev) {
282  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
283  Dev->getHandleRef(),
285  sizeof(pi_memory_scope_capabilities), &result, nullptr);
286  return readMemoryScopeBitfield(result);
287  }
288 };
289 
290 // Specialization for atomic_fence_scope_capabilities, PI returns a bitfield
291 template <>
292 struct get_device_info_impl<std::vector<memory_scope>,
293  info::device::atomic_fence_scope_capabilities> {
294  static std::vector<memory_scope> get(const DeviceImplPtr &Dev) {
296  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
297  Dev->getHandleRef(),
299  sizeof(pi_memory_scope_capabilities), &result, nullptr);
300  return readMemoryScopeBitfield(result);
301  }
302 };
303 
304 // Specialization for cuda cluster group
305 template <>
306 struct get_device_info_impl<bool, info::device::ext_oneapi_cuda_cluster_group> {
307  static bool get(const DeviceImplPtr &Dev) {
308  bool result = false;
309  if (Dev->getBackend() == backend::ext_oneapi_cuda) {
311  Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
312  Dev->getHandleRef(),
314  sizeof(result), &result, nullptr);
315  if (Err != PI_SUCCESS) {
316  return false;
317  }
318  }
319  return result;
320  }
321 };
322 
323 // Specialization for exec_capabilities, OpenCL returns a bitfield
324 template <>
325 struct get_device_info_impl<std::vector<info::execution_capability>,
326  info::device::execution_capabilities> {
327  static std::vector<info::execution_capability> get(const DeviceImplPtr &Dev) {
329  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
330  Dev->getHandleRef(),
332  &result, nullptr);
333  return read_execution_bitfield(result);
334  }
335 };
336 
337 // Specialization for built in kernel identifiers
338 template <>
339 struct get_device_info_impl<std::vector<kernel_id>,
340  info::device::built_in_kernel_ids> {
341  static std::vector<kernel_id> get(const DeviceImplPtr &Dev) {
342  std::string result = Dev->get_device_info_string(
344  auto names = split_string(result, ';');
345 
346  std::vector<kernel_id> ids;
347  ids.reserve(names.size());
348  for (const auto &name : names) {
349  ids.push_back(ProgramManager::getInstance().getBuiltInKernelID(name));
350  }
351  return ids;
352  }
353 };
354 
355 // Specialization for built in kernels, splits the string returned by OpenCL
356 template <>
357 struct get_device_info_impl<std::vector<std::string>,
358  info::device::built_in_kernels> {
359  static std::vector<std::string> get(const DeviceImplPtr &Dev) {
360  std::string result = Dev->get_device_info_string(
362  return split_string(result, ';');
363  }
364 };
365 
366 // Specialization for extensions, splits the string returned by OpenCL
367 template <>
368 struct get_device_info_impl<std::vector<std::string>,
369  info::device::extensions> {
370  static std::vector<std::string> get(const DeviceImplPtr &Dev) {
371  std::string result =
373  return split_string(result, ' ');
374  }
375 };
376 
378  switch (PP) {
384  return true;
385  }
386  return false;
387 }
388 
389 // Specialization for partition properties, variable OpenCL return size
390 template <>
391 struct get_device_info_impl<std::vector<info::partition_property>,
392  info::device::partition_properties> {
393  static std::vector<info::partition_property> get(const DeviceImplPtr &Dev) {
395  const auto &Plugin = Dev->getPlugin();
396 
397  size_t resultSize;
398  Plugin->call<PiApiKind::piDeviceGetInfo>(
399  Dev->getHandleRef(), info_partition, 0, nullptr, &resultSize);
400 
401  size_t arrayLength = resultSize / sizeof(cl_device_partition_property);
402  if (arrayLength == 0) {
403  return {};
404  }
405  std::unique_ptr<cl_device_partition_property[]> arrayResult(
406  new cl_device_partition_property[arrayLength]);
407  Plugin->call<PiApiKind::piDeviceGetInfo>(Dev->getHandleRef(),
408  info_partition, resultSize,
409  arrayResult.get(), nullptr);
410 
411  std::vector<info::partition_property> result;
412  for (size_t i = 0; i < arrayLength; ++i) {
413  // OpenCL extensions may have partition_properties that
414  // are not yet defined for SYCL (eg. CL_DEVICE_PARTITION_BY_NAMES_INTEL)
416  static_cast<info::partition_property>(arrayResult[i]));
418  result.push_back(pp);
419  }
420  return result;
421  }
422 };
423 
424 // Specialization for partition affinity domains, OpenCL returns a bitfield
425 template <>
426 struct get_device_info_impl<std::vector<info::partition_affinity_domain>,
427  info::device::partition_affinity_domains> {
428  static std::vector<info::partition_affinity_domain>
429  get(const DeviceImplPtr &Dev) {
431  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
432  Dev->getHandleRef(),
434  sizeof(result), &result, nullptr);
435  return read_domain_bitfield(result);
436  }
437 };
438 
439 // Specialization for partition type affinity domain, OpenCL can return other
440 // partition properties instead
441 template <>
443  info::device::partition_type_affinity_domain> {
445  size_t resultSize;
446  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
447  Dev->getHandleRef(),
449  nullptr, &resultSize);
450  if (resultSize != 1) {
452  }
453  cl_device_partition_property result;
454  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
455  Dev->getHandleRef(),
457  sizeof(result), &result, nullptr);
458  if (result == PI_DEVICE_AFFINITY_DOMAIN_NUMA ||
463  return info::partition_affinity_domain(result);
464  }
465 
467  }
468 };
469 
470 // Specialization for partition type
471 template <>
473  info::device::partition_type_property> {
475  size_t resultSize;
476  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
477  Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, 0, nullptr,
478  &resultSize);
479  if (!resultSize)
481 
482  size_t arrayLength = resultSize / sizeof(cl_device_partition_property);
483 
484  std::unique_ptr<cl_device_partition_property[]> arrayResult(
485  new cl_device_partition_property[arrayLength]);
486  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
487  Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, resultSize,
488  arrayResult.get(), nullptr);
489  if (!arrayResult[0])
491  return info::partition_property(arrayResult[0]);
492  }
493 };
494 // Specialization for supported subgroup sizes
495 template <>
496 struct get_device_info_impl<std::vector<size_t>,
497  info::device::sub_group_sizes> {
498  static std::vector<size_t> get(const DeviceImplPtr &Dev) {
499  size_t resultSize = 0;
500  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
502  0, nullptr, &resultSize);
503 
504  std::vector<uint32_t> result32(resultSize / sizeof(uint32_t));
505  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
507  resultSize, result32.data(), nullptr);
508 
509  std::vector<size_t> result;
510  result.reserve(result32.size());
511  for (uint32_t value : result32) {
512  result.push_back(value);
513  }
514  return result;
515  }
516 };
517 
518 // Specialization for kernel to kernel pipes.
519 // Here we step away from OpenCL, since there is no appropriate cl_device_info
520 // enum for global pipes feature.
521 template <>
522 struct get_device_info_impl<bool, info::device::kernel_kernel_pipe_support> {
523  static bool get(const DeviceImplPtr &Dev) {
524  // We claim, that all Intel FPGA devices support kernel to kernel pipe
525  // feature (at least at the scope of SYCL_INTEL_data_flow_pipes extension).
526  platform plt =
528  std::string platform_name = plt.get_info<info::platform::name>();
529  if (platform_name == "Intel(R) FPGA Emulation Platform for OpenCL(TM)" ||
530  platform_name == "Intel(R) FPGA SDK for OpenCL(TM)")
531  return true;
532 
533  // TODO: a better way is to query for supported SPIR-V capabilities when
534  // it's started to be possible. Also, if a device's backend supports
535  // SPIR-V 1.1 (where Pipe Storage feature was defined), than it supports
536  // the feature as well.
537  return false;
538  }
539 };
540 
541 template <int Dimensions>
542 range<Dimensions> construct_range(size_t *values) = delete;
543 // Due to the flipping of work group dimensions before kernel launch, the values
544 // should also be reversed.
545 template <> inline range<1> construct_range<1>(size_t *values) {
546  return {values[0]};
547 }
548 template <> inline range<2> construct_range<2>(size_t *values) {
549  return {values[1], values[0]};
550 }
551 template <> inline range<3> construct_range<3>(size_t *values) {
552  return {values[2], values[1], values[0]};
553 }
554 
555 // Specialization for max_work_item_sizes.
556 template <int Dimensions>
558  info::device::max_work_item_sizes<Dimensions>> {
559  static range<Dimensions> get(const DeviceImplPtr &Dev) {
560  size_t result[3];
561  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
562  Dev->getHandleRef(),
564  sizeof(result), &result, nullptr);
565  return construct_range<Dimensions>(result);
566  }
567 };
568 
570 
571 // Only for NVIDIA and AMD GPU architectures
572 constexpr std::pair<const char *, oneapi_exp_arch> NvidiaAmdGPUArchitectures[] =
573  {
574  {"5.0", oneapi_exp_arch::nvidia_gpu_sm_50},
575  {"5.2", oneapi_exp_arch::nvidia_gpu_sm_52},
576  {"5.3", oneapi_exp_arch::nvidia_gpu_sm_53},
577  {"6.0", oneapi_exp_arch::nvidia_gpu_sm_60},
578  {"6.1", oneapi_exp_arch::nvidia_gpu_sm_61},
579  {"6.2", oneapi_exp_arch::nvidia_gpu_sm_62},
580  {"7.0", oneapi_exp_arch::nvidia_gpu_sm_70},
581  {"7.2", oneapi_exp_arch::nvidia_gpu_sm_72},
582  {"7.5", oneapi_exp_arch::nvidia_gpu_sm_75},
583  {"8.0", oneapi_exp_arch::nvidia_gpu_sm_80},
584  {"8.6", oneapi_exp_arch::nvidia_gpu_sm_86},
585  {"8.7", oneapi_exp_arch::nvidia_gpu_sm_87},
586  {"8.9", oneapi_exp_arch::nvidia_gpu_sm_89},
587  {"9.0", oneapi_exp_arch::nvidia_gpu_sm_90},
588  {"gfx701", oneapi_exp_arch::amd_gpu_gfx701},
589  {"gfx702", oneapi_exp_arch::amd_gpu_gfx702},
590  {"gfx801", oneapi_exp_arch::amd_gpu_gfx801},
591  {"gfx802", oneapi_exp_arch::amd_gpu_gfx802},
592  {"gfx803", oneapi_exp_arch::amd_gpu_gfx803},
593  {"gfx805", oneapi_exp_arch::amd_gpu_gfx805},
594  {"gfx810", oneapi_exp_arch::amd_gpu_gfx810},
595  {"gfx900", oneapi_exp_arch::amd_gpu_gfx900},
596  {"gfx902", oneapi_exp_arch::amd_gpu_gfx902},
597  {"gfx904", oneapi_exp_arch::amd_gpu_gfx904},
598  {"gfx906", oneapi_exp_arch::amd_gpu_gfx906},
599  {"gfx908", oneapi_exp_arch::amd_gpu_gfx908},
600  {"gfx909", oneapi_exp_arch::amd_gpu_gfx909},
601  {"gfx90a", oneapi_exp_arch::amd_gpu_gfx90a},
602  {"gfx90c", oneapi_exp_arch::amd_gpu_gfx90c},
603  {"gfx940", oneapi_exp_arch::amd_gpu_gfx940},
604  {"gfx941", oneapi_exp_arch::amd_gpu_gfx941},
605  {"gfx942", oneapi_exp_arch::amd_gpu_gfx942},
606  {"gfx1010", oneapi_exp_arch::amd_gpu_gfx1010},
607  {"gfx1011", oneapi_exp_arch::amd_gpu_gfx1011},
608  {"gfx1012", oneapi_exp_arch::amd_gpu_gfx1012},
609  {"gfx1013", oneapi_exp_arch::amd_gpu_gfx1013},
610  {"gfx1030", oneapi_exp_arch::amd_gpu_gfx1030},
611  {"gfx1031", oneapi_exp_arch::amd_gpu_gfx1031},
612  {"gfx1032", oneapi_exp_arch::amd_gpu_gfx1032},
613  {"gfx1033", oneapi_exp_arch::amd_gpu_gfx1033},
614  {"gfx1034", oneapi_exp_arch::amd_gpu_gfx1034},
615  {"gfx1035", oneapi_exp_arch::amd_gpu_gfx1035},
616  {"gfx1036", oneapi_exp_arch::amd_gpu_gfx1036},
617  {"gfx1100", oneapi_exp_arch::amd_gpu_gfx1100},
618  {"gfx1101", oneapi_exp_arch::amd_gpu_gfx1101},
619  {"gfx1102", oneapi_exp_arch::amd_gpu_gfx1102},
620  {"gfx1103", oneapi_exp_arch::amd_gpu_gfx1103},
621  {"gfx1150", oneapi_exp_arch::amd_gpu_gfx1150},
622  {"gfx1151", oneapi_exp_arch::amd_gpu_gfx1151},
623  {"gfx1200", oneapi_exp_arch::amd_gpu_gfx1200},
624  {"gfx1201", oneapi_exp_arch::amd_gpu_gfx1201},
625 };
626 
627 // Only for Intel GPU architectures
628 constexpr std::pair<const int, oneapi_exp_arch> IntelGPUArchitectures[] = {
629  {0x02000000, oneapi_exp_arch::intel_gpu_bdw},
630  {0x02400009, oneapi_exp_arch::intel_gpu_skl},
631  {0x02404009, oneapi_exp_arch::intel_gpu_kbl},
632  {0x02408009, oneapi_exp_arch::intel_gpu_cfl},
633  {0x0240c000, oneapi_exp_arch::intel_gpu_apl},
634  {0x02410000, oneapi_exp_arch::intel_gpu_glk},
635  {0x02414000, oneapi_exp_arch::intel_gpu_whl},
636  {0x02418000, oneapi_exp_arch::intel_gpu_aml},
637  {0x0241c000, oneapi_exp_arch::intel_gpu_cml},
638  {0x02c00000, oneapi_exp_arch::intel_gpu_icllp},
639  {0x02c08000, oneapi_exp_arch::intel_gpu_ehl},
640  {0x03000000, oneapi_exp_arch::intel_gpu_tgllp},
641  {0x03004000, oneapi_exp_arch::intel_gpu_rkl},
642  {0x03008000, oneapi_exp_arch::intel_gpu_adl_s},
643  {0x0300c000, oneapi_exp_arch::intel_gpu_adl_p},
644  {0x03010000, oneapi_exp_arch::intel_gpu_adl_n},
645  {0x03028000, oneapi_exp_arch::intel_gpu_dg1},
646  {0x030dc000, oneapi_exp_arch::intel_gpu_acm_g10}, // A0
647  {0x030dc001, oneapi_exp_arch::intel_gpu_acm_g10}, // A1
648  {0x030dc004, oneapi_exp_arch::intel_gpu_acm_g10}, // B0
649  {0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10}, // C0
650  {0x030e0000, oneapi_exp_arch::intel_gpu_acm_g11}, // A0
651  {0x030e0004, oneapi_exp_arch::intel_gpu_acm_g11}, // B0
652  {0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11}, // B1
653  {0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12}, // A0
654  {0x030f0000, oneapi_exp_arch::intel_gpu_pvc}, // XL-A0
655  {0x030f0001, oneapi_exp_arch::intel_gpu_pvc}, // XL-AOP
656  {0x030f0003, oneapi_exp_arch::intel_gpu_pvc}, // XT-A0
657  {0x030f0005, oneapi_exp_arch::intel_gpu_pvc}, // XT-B0
658  {0x030f0006, oneapi_exp_arch::intel_gpu_pvc}, // XT-B1
659  {0x030f0007, oneapi_exp_arch::intel_gpu_pvc}, // XT-C0
660  {0x030f4007, oneapi_exp_arch::intel_gpu_pvc_vg}, // C0
661  {0x03118000, oneapi_exp_arch::intel_gpu_mtl_u}, // A0
662  {0x03118004, oneapi_exp_arch::intel_gpu_mtl_u}, // B0
663  {0x0311c000, oneapi_exp_arch::intel_gpu_mtl_h}, // A0
664  {0x0311c004, oneapi_exp_arch::intel_gpu_mtl_h}, // B0
665  {0x03128000, oneapi_exp_arch::intel_gpu_arl_h}, // A0
666  {0x03128004, oneapi_exp_arch::intel_gpu_arl_h}, // B0
667  {0x05004000, oneapi_exp_arch::intel_gpu_bmg_g21}, // A0
668  {0x05004001, oneapi_exp_arch::intel_gpu_bmg_g21}, // A1
669  {0x05004004, oneapi_exp_arch::intel_gpu_bmg_g21}, // B0
670  {0x05010000, oneapi_exp_arch::intel_gpu_lnl_m}, // A0
671  {0x05010001, oneapi_exp_arch::intel_gpu_lnl_m}, // A1
672  {0x05010004, oneapi_exp_arch::intel_gpu_lnl_m}, // B0
673 };
674 
675 // Only for Intel CPU architectures
676 constexpr std::pair<const int, oneapi_exp_arch> IntelCPUArchitectures[] = {
677  {8, oneapi_exp_arch::intel_cpu_spr},
678  {9, oneapi_exp_arch::intel_cpu_gnr},
679 };
680 
681 template <>
686  backend CurrentBackend = Dev->getBackend();
687  if (Dev->is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend ||
688  backend::opencl == CurrentBackend)) {
689  auto MapArchIDToArchName = [](const int arch) {
690  for (const auto &Item : IntelGPUArchitectures) {
691  if (Item.first == arch)
692  return Item.second;
693  }
694  return ext::oneapi::experimental::architecture::unknown;
695  };
696  uint32_t DeviceIp;
697  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
698  Dev->getHandleRef(),
699  PiInfoCode<
701  sizeof(DeviceIp), &DeviceIp, nullptr);
702  return MapArchIDToArchName(DeviceIp);
703  } else if (Dev->is_gpu() && (backend::ext_oneapi_cuda == CurrentBackend ||
704  backend::ext_oneapi_hip == CurrentBackend)) {
705  auto MapArchIDToArchName = [](const char *arch) {
706  for (const auto &Item : NvidiaAmdGPUArchitectures) {
707  if (std::string_view(Item.first) == arch)
708  return Item.second;
709  }
710  return ext::oneapi::experimental::architecture::unknown;
711  };
712  size_t ResultSize = 0;
713  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
714  Dev->getHandleRef(), PiInfoCode<info::device::version>::value, 0,
715  nullptr, &ResultSize);
716  std::unique_ptr<char[]> DeviceArch(new char[ResultSize]);
717  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
718  Dev->getHandleRef(), PiInfoCode<info::device::version>::value,
719  ResultSize, DeviceArch.get(), nullptr);
720  std::string DeviceArchCopy(DeviceArch.get());
721  std::string DeviceArchSubstr =
722  DeviceArchCopy.substr(0, DeviceArchCopy.find(":"));
723  return MapArchIDToArchName(DeviceArchSubstr.data());
724  } else if (Dev->is_cpu() && backend::opencl == CurrentBackend) {
725  auto MapArchIDToArchName = [](const int arch) {
726  for (const auto &Item : IntelCPUArchitectures) {
727  if (Item.first == arch)
728  return Item.second;
729  }
730  return sycl::ext::oneapi::experimental::architecture::x86_64;
731  };
732  uint32_t DeviceIp;
733  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
734  Dev->getHandleRef(),
735  PiInfoCode<
737  sizeof(DeviceIp), &DeviceIp, nullptr);
738  return MapArchIDToArchName(DeviceIp);
739  } // else is not needed
740  // TODO: add support of other architectures by extending with else if
741  return ext::oneapi::experimental::architecture::unknown;
742  }
743 };
744 
745 template <>
747  std::vector<ext::oneapi::experimental::matrix::combination>,
748  ext::oneapi::experimental::info::device::matrix_combinations> {
749  static std::vector<ext::oneapi::experimental::matrix::combination>
750  get(const DeviceImplPtr &Dev) {
751  using namespace ext::oneapi::experimental::matrix;
752  using namespace ext::oneapi::experimental;
753  backend CurrentBackend = Dev->getBackend();
754  auto get_current_architecture = [&Dev]() -> std::optional<architecture> {
755  // this helper lambda ignores all runtime-related exceptions from
756  // quering the device architecture. For instance, if device architecture
757  // on user's machine is not supported by
758  // sycl_ext_oneapi_device_architecture, the runtime exception is omitted,
759  // and std::nullopt is returned.
760  try {
761  return get_device_info_impl<
762  architecture,
764  } catch (sycl::exception &e) {
765  if (e.code() != errc::runtime)
766  std::rethrow_exception(std::make_exception_ptr(e));
767  }
768  return std::nullopt;
769  };
770  std::optional<architecture> DeviceArchOpt = get_current_architecture();
771  if (!DeviceArchOpt.has_value())
772  return {};
773  architecture DeviceArch = DeviceArchOpt.value();
774  if (architecture::intel_cpu_spr == DeviceArch)
775  return {
776  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
777  matrix_type::sint32, matrix_type::sint32},
778  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
779  matrix_type::sint32, matrix_type::sint32},
780  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
781  matrix_type::sint32, matrix_type::sint32},
782  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
783  matrix_type::sint32, matrix_type::sint32},
784  {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
785  matrix_type::fp32, matrix_type::fp32},
786  };
787  else if (architecture::intel_cpu_gnr == DeviceArch)
788  return {
789  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
790  matrix_type::sint32, matrix_type::sint32},
791  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
792  matrix_type::sint32, matrix_type::sint32},
793  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
794  matrix_type::sint32, matrix_type::sint32},
795  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
796  matrix_type::sint32, matrix_type::sint32},
797  {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
798  matrix_type::fp32, matrix_type::fp32},
799  {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
800  matrix_type::fp32, matrix_type::fp32},
801  };
802  else if (architecture::intel_gpu_pvc == DeviceArch)
803  return {
804  {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::uint8,
805  matrix_type::sint32, matrix_type::sint32},
806  {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::sint8,
807  matrix_type::sint32, matrix_type::sint32},
808  {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::uint8,
809  matrix_type::sint32, matrix_type::sint32},
810  {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::sint8,
811  matrix_type::sint32, matrix_type::sint32},
812  {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
813  matrix_type::fp32, matrix_type::fp32},
814  {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
815  matrix_type::fp32, matrix_type::fp32},
816  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
817  matrix_type::fp32, matrix_type::fp32},
818  {0, 0, 0, 1, 64, 16, matrix_type::bf16, matrix_type::bf16,
819  matrix_type::fp32, matrix_type::fp32},
820  {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
821  matrix_type::fp32, matrix_type::fp32},
822  {8, 0, 0, 0, 16, 8, matrix_type::tf32, matrix_type::tf32,
823  matrix_type::fp32, matrix_type::fp32},
824  };
825  else if ((architecture::intel_gpu_dg2_g10 == DeviceArch) ||
826  (architecture::intel_gpu_dg2_g11 == DeviceArch) ||
827  (architecture::intel_gpu_dg2_g12 == DeviceArch))
828  return {
829  {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::uint8,
830  matrix_type::sint32, matrix_type::sint32},
831  {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::sint8,
832  matrix_type::sint32, matrix_type::sint32},
833  {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::uint8,
834  matrix_type::sint32, matrix_type::sint32},
835  {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::sint8,
836  matrix_type::sint32, matrix_type::sint32},
837  {8, 0, 0, 0, 8, 16, matrix_type::fp16, matrix_type::fp16,
838  matrix_type::fp32, matrix_type::fp32},
839  {8, 0, 0, 0, 8, 16, matrix_type::bf16, matrix_type::bf16,
840  matrix_type::fp32, matrix_type::fp32},
841  };
842  else if (architecture::amd_gpu_gfx90a == DeviceArch)
843  return {
844  {0, 0, 0, 32, 32, 8, matrix_type::fp16, matrix_type::fp16,
845  matrix_type::fp32, matrix_type::fp32},
846  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
847  matrix_type::fp32, matrix_type::fp32},
848  {0, 0, 0, 32, 32, 8, matrix_type::sint8, matrix_type::sint8,
849  matrix_type::sint32, matrix_type::sint32},
850  {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
851  matrix_type::sint32, matrix_type::sint32},
852  {0, 0, 0, 32, 32, 8, matrix_type::bf16, matrix_type::bf16,
853  matrix_type::fp32, matrix_type::fp32},
854  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
855  matrix_type::fp32, matrix_type::fp32},
856  {0, 0, 0, 16, 16, 4, matrix_type::fp64, matrix_type::fp64,
857  matrix_type::fp64, matrix_type::fp64},
858  };
859  else if (backend::ext_oneapi_cuda == CurrentBackend) {
860  // TODO: Tho following can be simplified when comparison of architectures
861  // using < and > will be implemented
863  constexpr std::pair<float, oneapi_exp_arch> NvidiaArchNumbs[] = {
864  {5.0, oneapi_exp_arch::nvidia_gpu_sm_50},
865  {5.2, oneapi_exp_arch::nvidia_gpu_sm_52},
866  {5.3, oneapi_exp_arch::nvidia_gpu_sm_53},
867  {6.0, oneapi_exp_arch::nvidia_gpu_sm_60},
868  {6.1, oneapi_exp_arch::nvidia_gpu_sm_61},
869  {6.2, oneapi_exp_arch::nvidia_gpu_sm_62},
870  {7.0, oneapi_exp_arch::nvidia_gpu_sm_70},
871  {7.2, oneapi_exp_arch::nvidia_gpu_sm_72},
872  {7.5, oneapi_exp_arch::nvidia_gpu_sm_75},
873  {8.0, oneapi_exp_arch::nvidia_gpu_sm_80},
874  {8.6, oneapi_exp_arch::nvidia_gpu_sm_86},
875  {8.7, oneapi_exp_arch::nvidia_gpu_sm_87},
876  {8.9, oneapi_exp_arch::nvidia_gpu_sm_89},
877  {9.0, oneapi_exp_arch::nvidia_gpu_sm_90},
878  };
879  auto GetArchNum = [&](const architecture &arch) {
880  for (const auto &Item : NvidiaArchNumbs)
881  if (Item.second == arch)
882  return Item.first;
883  return 0.f;
884  };
885  float ComputeCapability = GetArchNum(DeviceArch);
886  std::vector<combination> sm_70_combinations = {
887  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
888  matrix_type::fp32, matrix_type::fp32},
889  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
890  matrix_type::fp32, matrix_type::fp32},
891  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
892  matrix_type::fp32, matrix_type::fp32},
893  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
894  matrix_type::fp16, matrix_type::fp16},
895  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
896  matrix_type::fp16, matrix_type::fp16},
897  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
898  matrix_type::fp16, matrix_type::fp16},
899  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
900  matrix_type::fp32, matrix_type::fp16},
901  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
902  matrix_type::fp32, matrix_type::fp16},
903  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
904  matrix_type::fp32, matrix_type::fp16},
905  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
906  matrix_type::fp16, matrix_type::fp32},
907  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
908  matrix_type::fp16, matrix_type::fp32},
909  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
910  matrix_type::fp16, matrix_type::fp32}};
911  std::vector<combination> sm_72_combinations = {
912  {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
913  matrix_type::sint32, matrix_type::sint32},
914  {0, 0, 0, 8, 32, 16, matrix_type::sint8, matrix_type::sint8,
915  matrix_type::sint32, matrix_type::sint32},
916  {0, 0, 0, 32, 8, 16, matrix_type::sint8, matrix_type::sint8,
917  matrix_type::sint32, matrix_type::sint32},
918  {0, 0, 0, 16, 16, 16, matrix_type::uint8, matrix_type::uint8,
919  matrix_type::sint32, matrix_type::sint32},
920  {0, 0, 0, 8, 32, 16, matrix_type::uint8, matrix_type::uint8,
921  matrix_type::sint32, matrix_type::sint32},
922  {0, 0, 0, 32, 8, 16, matrix_type::uint8, matrix_type::uint8,
923  matrix_type::sint32, matrix_type::sint32}};
924  std::vector<combination> sm_80_combinations = {
925  {0, 0, 0, 16, 16, 8, matrix_type::tf32, matrix_type::tf32,
926  matrix_type::fp32, matrix_type::fp32},
927  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
928  matrix_type::fp32, matrix_type::fp32},
929  {0, 0, 0, 8, 32, 16, matrix_type::bf16, matrix_type::bf16,
930  matrix_type::fp32, matrix_type::fp32},
931  {0, 0, 0, 32, 8, 16, matrix_type::bf16, matrix_type::bf16,
932  matrix_type::fp32, matrix_type::fp32},
933  {0, 0, 0, 8, 8, 4, matrix_type::fp64, matrix_type::fp64,
934  matrix_type::fp64, matrix_type::fp64}};
935  if (ComputeCapability >= 8.0) {
936  sm_80_combinations.insert(sm_80_combinations.end(),
937  sm_72_combinations.begin(),
938  sm_72_combinations.end());
939  sm_80_combinations.insert(sm_80_combinations.end(),
940  sm_70_combinations.begin(),
941  sm_70_combinations.end());
942  return sm_80_combinations;
943  } else if (ComputeCapability >= 7.2) {
944  sm_72_combinations.insert(sm_72_combinations.end(),
945  sm_70_combinations.begin(),
946  sm_70_combinations.end());
947  return sm_72_combinations;
948  } else if (ComputeCapability >= 7.0)
949  return sm_70_combinations;
950  }
951  return {};
952  }
953 };
954 
955 template <>
957  size_t, ext::oneapi::experimental::info::device::max_global_work_groups> {
958  static size_t get(const DeviceImplPtr) {
959  return static_cast<size_t>((std::numeric_limits<int>::max)());
960  }
961 };
962 template <>
964  id<1>, ext::oneapi::experimental::info::device::max_work_groups<1>> {
965  static id<1> get(const DeviceImplPtr &Dev) {
966  size_t result[3];
967  size_t Limit =
968  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
969  max_global_work_groups>::get(Dev);
970  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
971  Dev->getHandleRef(),
972  PiInfoCode<
974  sizeof(result), &result, nullptr);
975  return id<1>(std::min(Limit, result[0]));
976  }
977 };
978 
979 template <>
981  id<2>, ext::oneapi::experimental::info::device::max_work_groups<2>> {
982  static id<2> get(const DeviceImplPtr &Dev) {
983  size_t result[3];
984  size_t Limit =
985  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
986  max_global_work_groups>::get(Dev);
987  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
988  Dev->getHandleRef(),
989  PiInfoCode<
991  sizeof(result), &result, nullptr);
992  return id<2>(std::min(Limit, result[1]), std::min(Limit, result[0]));
993  }
994 };
995 
996 template <>
998  id<3>, ext::oneapi::experimental::info::device::max_work_groups<3>> {
999  static id<3> get(const DeviceImplPtr &Dev) {
1000  size_t result[3];
1001  size_t Limit =
1002  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
1003  max_global_work_groups>::get(Dev);
1004  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1005  Dev->getHandleRef(),
1006  PiInfoCode<
1008  sizeof(result), &result, nullptr);
1009  return id<3>(std::min(Limit, result[2]), std::min(Limit, result[1]),
1010  std::min(Limit, result[0]));
1011  }
1012 };
1013 
1014 // TODO:Remove with deprecated feature
1015 // device::get_info<info::device::ext_oneapi_max_global_work_groups>
1016 template <>
1017 struct get_device_info_impl<size_t,
1018  info::device::ext_oneapi_max_global_work_groups> {
1019  static size_t get(const DeviceImplPtr &Dev) {
1020  return get_device_info_impl<size_t,
1021  ext::oneapi::experimental::info::device::
1022  max_global_work_groups>::get(Dev);
1023  }
1024 };
1025 
1026 // TODO:Remove with deprecated feature
1027 // device::get_info<info::device::ext_oneapi_max_work_groups_1d>
1028 template <>
1030  info::device::ext_oneapi_max_work_groups_1d> {
1031  static id<1> get(const DeviceImplPtr &Dev) {
1032  return get_device_info_impl<
1033  id<1>,
1035  }
1036 };
1037 
1038 // TODO:Remove with deprecated feature
1039 // device::get_info<info::device::ext_oneapi_max_work_groups_2d>
1040 template <>
1042  info::device::ext_oneapi_max_work_groups_2d> {
1043  static id<2> get(const DeviceImplPtr &Dev) {
1044  return get_device_info_impl<
1045  id<2>,
1047  }
1048 };
1049 
1050 // TODO:Remove with deprecated feature
1051 // device::get_info<info::device::ext_oneapi_max_work_groups_3d>
1052 template <>
1054  info::device::ext_oneapi_max_work_groups_3d> {
1055  static id<3> get(const DeviceImplPtr &Dev) {
1056  return get_device_info_impl<
1057  id<3>,
1059  }
1060 };
1061 
1062 // Specialization for parent device
1063 template <> struct get_device_info_impl<device, info::device::parent_device> {
1064  static device get(const DeviceImplPtr &Dev) {
1065  typename sycl_to_pi<device>::type result;
1066  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1068  sizeof(result), &result, nullptr);
1069  if (result == nullptr)
1071  "No parent for device because it is not a subdevice");
1072 
1073  const auto &Platform = Dev->getPlatformImpl();
1074  return createSyclObjFromImpl<device>(
1075  Platform->getOrMakeDeviceImpl(result, Platform));
1076  }
1077 };
1078 
1079 // Specialization for image_support
1080 template <> struct get_device_info_impl<bool, info::device::image_support> {
1081  static bool get(const DeviceImplPtr &) {
1082  // No devices currently support SYCL 2020 images.
1083  return false;
1084  }
1085 };
1086 
1087 // USM
1088 
1089 // Specialization for device usm query.
1090 template <>
1091 struct get_device_info_impl<bool, info::device::usm_device_allocations> {
1092  static bool get(const DeviceImplPtr &Dev) {
1093  pi_usm_capabilities caps;
1094  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1095  Dev->getHandleRef(),
1097  sizeof(pi_usm_capabilities), &caps, nullptr);
1098 
1099  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1100  }
1101 };
1102 
1103 // Specialization for host usm query.
1104 template <>
1105 struct get_device_info_impl<bool, info::device::usm_host_allocations> {
1106  static bool get(const DeviceImplPtr &Dev) {
1107  pi_usm_capabilities caps;
1108  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1109  Dev->getHandleRef(),
1111  sizeof(pi_usm_capabilities), &caps, nullptr);
1112 
1113  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1114  }
1115 };
1116 
1117 // Specialization for shared usm query.
1118 template <>
1119 struct get_device_info_impl<bool, info::device::usm_shared_allocations> {
1120  static bool get(const DeviceImplPtr &Dev) {
1121  pi_usm_capabilities caps;
1122  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1123  Dev->getHandleRef(),
1125  sizeof(pi_usm_capabilities), &caps, nullptr);
1126  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1127  }
1128 };
1129 
1130 // Specialization for restricted usm query
1131 template <>
1133  info::device::usm_restricted_shared_allocations> {
1134  static bool get(const DeviceImplPtr &Dev) {
1135  pi_usm_capabilities caps;
1136  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1137  Dev->getHandleRef(),
1139  sizeof(pi_usm_capabilities), &caps, nullptr);
1140  // Check that we don't support any cross device sharing
1141  return (Err != PI_SUCCESS)
1142  ? false
1143  : !(caps & (PI_USM_ACCESS | PI_USM_CONCURRENT_ACCESS));
1144  }
1145 };
1146 
1147 // Specialization for system usm query
1148 template <>
1149 struct get_device_info_impl<bool, info::device::usm_system_allocations> {
1150  static bool get(const DeviceImplPtr &Dev) {
1151  pi_usm_capabilities caps;
1152  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1153  Dev->getHandleRef(),
1155  sizeof(pi_usm_capabilities), &caps, nullptr);
1156  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1157  }
1158 };
1159 
1160 // Specialization for kernel fusion support
1161 template <>
1163  bool, ext::codeplay::experimental::info::device::supports_fusion> {
1164  static bool get(const DeviceImplPtr &Dev) {
1165 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
1166  // If the JIT library can't be loaded or entry points in the JIT library
1167  // can't be resolved, fusion is not available.
1168  if (!jit_compiler::get_instance().isAvailable()) {
1169  return false;
1170  }
1171  // Currently fusion is only supported for SPIR-V based backends,
1172  // CUDA and HIP.
1173  if (Dev->getBackend() == backend::opencl) {
1174  // Exclude all non-CPU or non-GPU devices on OpenCL, in particular
1175  // accelerators.
1176  return Dev->is_cpu() || Dev->is_gpu();
1177  }
1178 
1179  return (Dev->getBackend() == backend::ext_oneapi_level_zero) ||
1180  (Dev->getBackend() == backend::ext_oneapi_cuda) ||
1181  (Dev->getBackend() == backend::ext_oneapi_hip);
1182 #else // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1183  (void)Dev;
1184  return false;
1185 #endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1186  }
1187 };
1188 
1189 // Specialization for max registers per work-group
1190 template <>
1192  uint32_t,
1193  ext::codeplay::experimental::info::device::max_registers_per_work_group> {
1194  static uint32_t get(const DeviceImplPtr &Dev) {
1195  uint32_t maxRegsPerWG;
1196  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1197  Dev->getHandleRef(),
1198  PiInfoCode<ext::codeplay::experimental::info::device::
1199  max_registers_per_work_group>::value,
1200  sizeof(maxRegsPerWG), &maxRegsPerWG, nullptr);
1201  return maxRegsPerWG;
1202  }
1203 };
1204 
1205 // Specialization for composite devices extension.
1206 template <>
1208  std::vector<sycl::device>,
1209  ext::oneapi::experimental::info::device::component_devices> {
1210  static std::vector<sycl::device> get(const DeviceImplPtr &Dev) {
1211  size_t ResultSize = 0;
1212  // First call to get DevCount.
1213  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1214  Dev->getHandleRef(),
1215  PiInfoCode<
1216  ext::oneapi::experimental::info::device::component_devices>::value,
1217  0, nullptr, &ResultSize);
1218 
1219  // If the feature is unsupported or if the result was empty, return an empty
1220  // list of devices.
1221  if (Err == PI_ERROR_INVALID_VALUE || (Err == PI_SUCCESS && ResultSize == 0))
1222  return {};
1223 
1224  // Otherwise, if there was an error from PI it is unexpected and we should
1225  // handle it accordingly.
1226  Dev->getPlugin()->checkPiResult(Err);
1227 
1228  size_t DevCount = ResultSize / sizeof(pi_device);
1229  // Second call to get the list.
1230  std::vector<pi_device> Devs(DevCount);
1231  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1232  Dev->getHandleRef(),
1233  PiInfoCode<
1234  ext::oneapi::experimental::info::device::component_devices>::value,
1235  ResultSize, Devs.data(), nullptr);
1236  std::vector<sycl::device> Result;
1237  const auto &Platform = Dev->getPlatformImpl();
1238  for (const auto &d : Devs)
1239  Result.push_back(createSyclObjFromImpl<device>(
1240  Platform->getOrMakeDeviceImpl(d, Platform)));
1241 
1242  return Result;
1243  }
1244 };
1245 template <>
1247  sycl::device, ext::oneapi::experimental::info::device::composite_device> {
1248  static sycl::device get(const DeviceImplPtr &Dev) {
1249  if (!Dev->has(sycl::aspect::ext_oneapi_is_component))
1251  "Only devices with aspect::ext_oneapi_is_component "
1252  "can call this function.");
1253 
1254  typename sycl_to_pi<device>::type Result;
1255  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1256  Dev->getHandleRef(),
1257  PiInfoCode<
1258  ext::oneapi::experimental::info::device::composite_device>::value,
1259  sizeof(Result), &Result, nullptr);
1260 
1261  if (Result) {
1262  const auto &Platform = Dev->getPlatformImpl();
1263  return createSyclObjFromImpl<device>(
1264  Platform->getOrMakeDeviceImpl(Result, Platform));
1265  }
1267  "A component with aspect::ext_oneapi_is_component "
1268  "must have a composite device.");
1269  }
1270 };
1271 
1272 template <typename Param>
1273 typename Param::return_type get_device_info(const DeviceImplPtr &Dev) {
1274  static_assert(is_device_info_desc<Param>::value,
1275  "Invalid device information descriptor");
1276  if (std::is_same<Param,
1277  sycl::_V1::ext::intel::info::device::free_memory>::value) {
1278  if (!Dev->has(aspect::ext_intel_free_memory))
1279  throw exception(
1281  "The device does not have the ext_intel_free_memory aspect");
1282  }
1284 }
1285 
1286 // Returns the list of all progress guarantees that can be requested for
1287 // work_groups from the coordination level of root_group when using the device
1288 // given by Dev. First it calls getProgressGuarantee to get the strongest
1289 // guarantee available and then calls getProgressGuaranteesUpTo to get a list of
1290 // all guarantees that are either equal to the strongest guarantee or weaker
1291 // than it. The next 5 definitions follow the same model but for different
1292 // scopes.
1293 template <typename ReturnT>
1295  ReturnT,
1296  ext::oneapi::experimental::info::device::work_group_progress_capabilities<
1297  ext::oneapi::experimental::execution_scope::root_group>> {
1298  static ReturnT get(const DeviceImplPtr &Dev) {
1300  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1301  Dev->getProgressGuarantee(execution_scope::work_group,
1302  execution_scope::root_group));
1303  }
1304 };
1305 template <typename ReturnT>
1307  ReturnT,
1308  ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
1309  ext::oneapi::experimental::execution_scope::root_group>> {
1310  static ReturnT get(const DeviceImplPtr &Dev) {
1312  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1313  Dev->getProgressGuarantee(execution_scope::sub_group,
1314  execution_scope::root_group));
1315  }
1316 };
1317 
1318 template <typename ReturnT>
1320  ReturnT,
1321  ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
1322  ext::oneapi::experimental::execution_scope::work_group>> {
1323  static ReturnT get(const DeviceImplPtr &Dev) {
1324 
1326  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1327  Dev->getProgressGuarantee(execution_scope::sub_group,
1328  execution_scope::work_group));
1329  }
1330 };
1331 
1332 template <typename ReturnT>
1334  ReturnT,
1335  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
1336  ext::oneapi::experimental::execution_scope::root_group>> {
1337  static ReturnT get(const DeviceImplPtr &Dev) {
1338 
1340  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1341  Dev->getProgressGuarantee(execution_scope::work_item,
1342  execution_scope::root_group));
1343  }
1344 };
1345 template <typename ReturnT>
1347  ReturnT,
1348  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
1349  ext::oneapi::experimental::execution_scope::work_group>> {
1350  static ReturnT get(const DeviceImplPtr &Dev) {
1351 
1353  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1354  Dev->getProgressGuarantee(execution_scope::work_item,
1355  execution_scope::work_group));
1356  }
1357 };
1358 
1359 template <typename ReturnT>
1361  ReturnT,
1362  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
1363  ext::oneapi::experimental::execution_scope::sub_group>> {
1364  static ReturnT get(const DeviceImplPtr &Dev) {
1365 
1367  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1368  Dev->getProgressGuarantee(execution_scope::work_item,
1369  execution_scope::sub_group));
1370  }
1371 };
1372 
1373 } // namespace detail
1374 } // namespace _V1
1375 } // namespace sycl
static ProgramManager & getInstance()
std::string get_device_info_string(sycl::detail::pi::PiDeviceInfo InfoCode) const
Get device info string.
const PluginPtr & getPlugin() const
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
Definition: device_impl.hpp:67
static jit_compiler & get_instance()
static std::shared_ptr< platform_impl > getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, const PluginPtr &Plugin)
Queries the cache to see if the specified PiPlatform has been seen before.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
const std::error_code & code() const noexcept
Definition: exception.cpp:42
A unique identifier of an item in an index space.
Definition: id.hpp:36
Encapsulates a SYCL platform on which kernels may be executed.
Definition: platform.hpp:99
#define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN)
::pi_device PiDevice
Definition: pi.hpp:105
::pi_platform PiPlatform
Definition: pi.hpp:103
std::vector< memory_scope > readMemoryScopeBitfield(pi_memory_scope_capabilities bits)
range< 2 > construct_range< 2 >(size_t *values)
std::vector< info::fp_config > read_fp_bitfield(pi_device_fp_config bits)
Definition: device_info.hpp:37
range< Dimensions > construct_range(size_t *values)=delete
constexpr std::pair< const int, oneapi_exp_arch > IntelGPUArchitectures[]
constexpr std::pair< const int, oneapi_exp_arch > IntelCPUArchitectures[]
range< 3 > construct_range< 3 >(size_t *values)
std::string affinityDomainToString(info::partition_affinity_domain AffinityDomain)
Definition: device_info.hpp:87
static bool is_sycl_partition_property(info::partition_property PP)
std::vector< info::partition_affinity_domain > read_domain_bitfield(pi_device_affinity_domain bits)
Definition: device_info.hpp:59
std::vector< std::string > split_string(std::string_view str, char delimeter)
std::shared_ptr< device_impl > DeviceImplPtr
Param::return_type get_device_info(const DeviceImplPtr &Dev)
constexpr std::pair< const char *, oneapi_exp_arch > NvidiaAmdGPUArchitectures[]
std::vector< memory_order > readMemoryOrderBitfield(pi_memory_order_capabilities bits)
std::vector< info::execution_capability > read_execution_bitfield(pi_device_exec_capabilities bits)
Definition: device_info.hpp:77
range< 1 > construct_range< 1 >(size_t *values)
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
static constexpr pi_device_fp_config PI_FP_SOFT_FLOAT
Definition: pi.h:982
pi_bitfield pi_device_exec_capabilities
Definition: pi.h:806
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE
Definition: pi.h:972
_pi_result
Definition: pi.h:274
pi_uint32 pi_bool
Definition: pi.h:265
_pi_usm_capabilities pi_usm_capabilities
Definition: pi.h:2167
_pi_device_info
Definition: pi.h:355
@ PI_DEVICE_INFO_PARTITION_TYPE
Definition: pi.h:431
static constexpr pi_device_fp_config PI_FP_DENORM
Definition: pi.h:976
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE
Definition: pi.h:967
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE
Definition: pi.h:965
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:78
_pi_usm_capabilities
Definition: pi.h:2138
@ PI_USM_ACCESS
Definition: pi.h:2139
@ PI_USM_CONCURRENT_ACCESS
Definition: pi.h:2141
pi_bitfield pi_queue_properties
Definition: pi.h:881
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE
Definition: pi.h:963
static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST
Definition: pi.h:978
static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF
Definition: pi.h:980
_pi_device * pi_device
Definition: pi.h:1302
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
Definition: pi.h:886
static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
Definition: pi.h:983
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE
Definition: pi.h:969
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA
Definition: pi.h:961
pi_bitfield pi_memory_scope_capabilities
Definition: pi.h:828
pi_bitfield pi_device_fp_config
Definition: pi.h:975
static constexpr pi_device_exec_capabilities PI_EXEC_NATIVE_KERNEL
Definition: pi.h:989
static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO
Definition: pi.h:979
static constexpr pi_device_fp_config PI_FP_FMA
Definition: pi.h:981
static constexpr pi_device_exec_capabilities PI_EXEC_KERNEL
Definition: pi.h:988
static constexpr pi_device_fp_config PI_FP_INF_NAN
Definition: pi.h:977
pi_bitfield pi_device_affinity_domain
Definition: pi.h:960
pi_bitfield pi_memory_order_capabilities
Definition: pi.h:821
C++ wrapper of extern "C" PI interfaces.
static platform get(const DeviceImplPtr &Dev)
static std::string get(const DeviceImplPtr &Dev)
static std::vector< info::fp_config > get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)