DPC++ Runtime
Runtime libraries for oneAPI DPC++
device_info.hpp
Go to the documentation of this file.
1 //==-------- device_info.hpp - SYCL device info methods --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 #include <detail/device_impl.hpp>
11 #include <detail/jit_compiler.hpp>
12 #include <detail/platform_impl.hpp>
13 #include <detail/platform_util.hpp>
14 #include <detail/plugin.hpp>
16 #include <sycl/detail/defines.hpp>
17 #include <sycl/detail/os_util.hpp>
18 #include <sycl/detail/pi.hpp>
19 #include <sycl/device.hpp>
22 #include <sycl/feature_test.hpp>
23 #include <sycl/info/info_desc.hpp>
24 #include <sycl/memory_enums.hpp>
25 #include <sycl/platform.hpp>
26 
27 #include <chrono>
28 #include <sstream>
29 #include <thread>
30 
31 #include "split_string.hpp"
32 
33 namespace sycl {
34 inline namespace _V1 {
35 namespace detail {
36 
37 inline std::vector<info::fp_config> read_fp_bitfield(pi_device_fp_config bits) {
38  std::vector<info::fp_config> result;
39  if (bits & PI_FP_DENORM)
40  result.push_back(info::fp_config::denorm);
41  if (bits & PI_FP_INF_NAN)
42  result.push_back(info::fp_config::inf_nan);
43  if (bits & PI_FP_ROUND_TO_NEAREST)
44  result.push_back(info::fp_config::round_to_nearest);
45  if (bits & PI_FP_ROUND_TO_ZERO)
46  result.push_back(info::fp_config::round_to_zero);
47  if (bits & PI_FP_ROUND_TO_INF)
48  result.push_back(info::fp_config::round_to_inf);
49  if (bits & PI_FP_FMA)
50  result.push_back(info::fp_config::fma);
51  if (bits & PI_FP_SOFT_FLOAT)
52  result.push_back(info::fp_config::soft_float);
55  return result;
56 }
57 
58 inline std::vector<info::partition_affinity_domain>
60  std::vector<info::partition_affinity_domain> result;
62  result.push_back(info::partition_affinity_domain::numa);
73  return result;
74 }
75 
76 inline std::vector<info::execution_capability>
78  std::vector<info::execution_capability> result;
79  if (bits & PI_EXEC_KERNEL)
81  if (bits & PI_EXEC_NATIVE_KERNEL)
83  return result;
84 }
85 
86 inline std::string
88  switch (AffinityDomain) {
89 #define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN) \
90  case DOMAIN: \
91  return #DOMAIN;
92 
94  sycl::info::partition_affinity_domain::numa)
96  sycl::info::partition_affinity_domain::L4_cache)
98  sycl::info::partition_affinity_domain::L3_cache)
100  sycl::info::partition_affinity_domain::L2_cache)
102  sycl::info::partition_affinity_domain::L1_cache)
104  sycl::info::partition_affinity_domain::next_partitionable)
105 #undef __SYCL_AFFINITY_DOMAIN_STRING_CASE
106  default:
107  assert(false && "Missing case for affinity domain.");
108  return "unknown";
109  }
110 }
111 
112 // Mapping expected SYCL return types to those returned by PI calls
113 template <typename T> struct sycl_to_pi {
114  using type = T;
115 };
116 template <> struct sycl_to_pi<bool> {
117  using type = pi_bool;
118 };
119 template <> struct sycl_to_pi<device> {
121 };
122 template <> struct sycl_to_pi<platform> {
124 };
125 
126 // Mapping fp_config device info types to the values used to check fp support
127 template <typename Param> struct check_fp_support {};
128 
129 template <> struct check_fp_support<info::device::half_fp_config> {
130  using type = info::device::native_vector_width_half;
131 };
132 
133 template <> struct check_fp_support<info::device::double_fp_config> {
134  using type = info::device::native_vector_width_double;
135 };
136 
137 // Structs for emulating function template partial specialization
138 // Default template for the general case
139 // TODO: get rid of remaining uses of OpenCL directly
140 //
141 template <typename ReturnT, typename Param> struct get_device_info_impl {
142  static ReturnT get(const DeviceImplPtr &Dev) {
143  typename sycl_to_pi<ReturnT>::type result;
144  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
145  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
146  nullptr);
147  return ReturnT(result);
148  }
149 };
150 
151 // Specialization for platform
152 template <typename Param> struct get_device_info_impl<platform, Param> {
153  static platform get(const DeviceImplPtr &Dev) {
154  typename sycl_to_pi<platform>::type result;
155  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
156  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
157  nullptr);
158  // TODO: Change PiDevice to device_impl.
159  // Use the Plugin from the device_impl class after plugin details
160  // are added to the class.
161  return createSyclObjFromImpl<platform>(
162  platform_impl::getOrMakePlatformImpl(result, Dev->getPlugin()));
163  }
164 };
165 
166 // Helper function to allow using the specialization of get_device_info_impl
167 // for string return type in other specializations.
169  sycl::detail::pi::PiDeviceInfo InfoCode) const {
170  size_t resultSize = 0;
171  getPlugin()->call<PiApiKind::piDeviceGetInfo>(getHandleRef(), InfoCode, 0,
172  nullptr, &resultSize);
173  if (resultSize == 0) {
174  return std::string();
175  }
176  std::unique_ptr<char[]> result(new char[resultSize]);
178  getHandleRef(), InfoCode, resultSize, result.get(), nullptr);
179 
180  return std::string(result.get());
181 }
182 
183 // Specialization for string return type, variable return size
184 template <typename Param> struct get_device_info_impl<std::string, Param> {
185  static std::string get(const DeviceImplPtr &Dev) {
186  return Dev->get_device_info_string(PiInfoCode<Param>::value);
187  }
188 };
189 
190 // Specialization for parent device
191 template <typename ReturnT>
192 struct get_device_info_impl<ReturnT, info::device::parent_device> {
193  static ReturnT get(const DeviceImplPtr &Dev);
194 };
195 
196 // Specialization for fp_config types, checks the corresponding fp type support
197 template <typename Param>
198 struct get_device_info_impl<std::vector<info::fp_config>, Param> {
199  static std::vector<info::fp_config> get(const DeviceImplPtr &Dev) {
200  // Check if fp type is supported
203  typename check_fp_support<Param>::type>::get(Dev)) {
204  return {};
205  }
206  cl_device_fp_config result;
207  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
208  Dev->getHandleRef(), PiInfoCode<Param>::value, sizeof(result), &result,
209  nullptr);
210  return read_fp_bitfield(result);
211  }
212 };
213 
214 // Specialization for device version
215 template <> struct get_device_info_impl<std::string, info::device::version> {
216  static std::string get(const DeviceImplPtr &Dev) {
217  return Dev->get_device_info_string(
219  }
220 };
221 
222 // Specialization for single_fp_config, no type support check required
223 template <>
224 struct get_device_info_impl<std::vector<info::fp_config>,
225  info::device::single_fp_config> {
226  static std::vector<info::fp_config> get(const DeviceImplPtr &Dev) {
227  pi_device_fp_config result;
228  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
230  sizeof(result), &result, nullptr);
231  return read_fp_bitfield(result);
232  }
233 };
234 
235 // Specialization for queue_profiling. In addition to pi_queue level profiling,
236 // piGetDeviceAndHostTimer is not supported, command_submit, command_start,
237 // command_end will be calculated. See MFallbackProfiling
238 template <> struct get_device_info_impl<bool, info::device::queue_profiling> {
239  static bool get(const DeviceImplPtr &Dev) {
240  pi_queue_properties Properties;
241  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
243  sizeof(Properties), &Properties, nullptr);
244  return Properties & PI_QUEUE_FLAG_PROFILING_ENABLE;
245  }
246 };
247 
248 // Specialization for atomic_memory_order_capabilities, PI returns a bitfield
249 template <>
250 struct get_device_info_impl<std::vector<memory_order>,
251  info::device::atomic_memory_order_capabilities> {
252  static std::vector<memory_order> get(const DeviceImplPtr &Dev) {
254  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
255  Dev->getHandleRef(),
257  sizeof(pi_memory_order_capabilities), &result, nullptr);
258  return readMemoryOrderBitfield(result);
259  }
260 };
261 
262 // Specialization for atomic_fence_order_capabilities, PI returns a bitfield
263 template <>
264 struct get_device_info_impl<std::vector<memory_order>,
265  info::device::atomic_fence_order_capabilities> {
266  static std::vector<memory_order> get(const DeviceImplPtr &Dev) {
268  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
269  Dev->getHandleRef(),
271  sizeof(pi_memory_order_capabilities), &result, nullptr);
272  return readMemoryOrderBitfield(result);
273  }
274 };
275 
276 // Specialization for atomic_memory_scope_capabilities, PI returns a bitfield
277 template <>
278 struct get_device_info_impl<std::vector<memory_scope>,
279  info::device::atomic_memory_scope_capabilities> {
280  static std::vector<memory_scope> get(const DeviceImplPtr &Dev) {
282  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
283  Dev->getHandleRef(),
285  sizeof(pi_memory_scope_capabilities), &result, nullptr);
286  return readMemoryScopeBitfield(result);
287  }
288 };
289 
290 // Specialization for atomic_fence_scope_capabilities, PI returns a bitfield
291 template <>
292 struct get_device_info_impl<std::vector<memory_scope>,
293  info::device::atomic_fence_scope_capabilities> {
294  static std::vector<memory_scope> get(const DeviceImplPtr &Dev) {
296  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
297  Dev->getHandleRef(),
299  sizeof(pi_memory_scope_capabilities), &result, nullptr);
300  return readMemoryScopeBitfield(result);
301  }
302 };
303 
304 // Specialization for bf16 math functions
305 template <>
307  info::device::ext_oneapi_bfloat16_math_functions> {
308  static bool get(const DeviceImplPtr &Dev) {
309  bool result = false;
310 
312  Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
313  Dev->getHandleRef(),
315  sizeof(result), &result, nullptr);
316  if (Err != PI_SUCCESS) {
317  return false;
318  }
319  return result;
320  }
321 };
322 
323 // Specialization for exec_capabilities, OpenCL returns a bitfield
324 template <>
325 struct get_device_info_impl<std::vector<info::execution_capability>,
326  info::device::execution_capabilities> {
327  static std::vector<info::execution_capability> get(const DeviceImplPtr &Dev) {
329  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
330  Dev->getHandleRef(),
332  &result, nullptr);
333  return read_execution_bitfield(result);
334  }
335 };
336 
337 // Specialization for built in kernel identifiers
338 template <>
339 struct get_device_info_impl<std::vector<kernel_id>,
340  info::device::built_in_kernel_ids> {
341  static std::vector<kernel_id> get(const DeviceImplPtr &Dev) {
342  std::string result = Dev->get_device_info_string(
344  auto names = split_string(result, ';');
345 
346  std::vector<kernel_id> ids;
347  ids.reserve(names.size());
348  for (const auto &name : names) {
349  ids.push_back(ProgramManager::getInstance().getBuiltInKernelID(name));
350  }
351  return ids;
352  }
353 };
354 
355 // Specialization for built in kernels, splits the string returned by OpenCL
356 template <>
357 struct get_device_info_impl<std::vector<std::string>,
358  info::device::built_in_kernels> {
359  static std::vector<std::string> get(const DeviceImplPtr &Dev) {
360  std::string result = Dev->get_device_info_string(
362  return split_string(result, ';');
363  }
364 };
365 
366 // Specialization for extensions, splits the string returned by OpenCL
367 template <>
368 struct get_device_info_impl<std::vector<std::string>,
369  info::device::extensions> {
370  static std::vector<std::string> get(const DeviceImplPtr &Dev) {
371  std::string result =
373  return split_string(result, ' ');
374  }
375 };
376 
378  switch (PP) {
384  return true;
385  }
386  return false;
387 }
388 
389 // Specialization for partition properties, variable OpenCL return size
390 template <>
391 struct get_device_info_impl<std::vector<info::partition_property>,
392  info::device::partition_properties> {
393  static std::vector<info::partition_property> get(const DeviceImplPtr &Dev) {
395  const auto &Plugin = Dev->getPlugin();
396 
397  size_t resultSize;
398  Plugin->call<PiApiKind::piDeviceGetInfo>(
399  Dev->getHandleRef(), info_partition, 0, nullptr, &resultSize);
400 
401  size_t arrayLength = resultSize / sizeof(cl_device_partition_property);
402  if (arrayLength == 0) {
403  return {};
404  }
405  std::unique_ptr<cl_device_partition_property[]> arrayResult(
406  new cl_device_partition_property[arrayLength]);
407  Plugin->call<PiApiKind::piDeviceGetInfo>(Dev->getHandleRef(),
408  info_partition, resultSize,
409  arrayResult.get(), nullptr);
410 
411  std::vector<info::partition_property> result;
412  for (size_t i = 0; i < arrayLength; ++i) {
413  // OpenCL extensions may have partition_properties that
414  // are not yet defined for SYCL (eg. CL_DEVICE_PARTITION_BY_NAMES_INTEL)
416  static_cast<info::partition_property>(arrayResult[i]));
418  result.push_back(pp);
419  }
420  return result;
421  }
422 };
423 
424 // Specialization for partition affinity domains, OpenCL returns a bitfield
425 template <>
426 struct get_device_info_impl<std::vector<info::partition_affinity_domain>,
427  info::device::partition_affinity_domains> {
428  static std::vector<info::partition_affinity_domain>
429  get(const DeviceImplPtr &Dev) {
431  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
432  Dev->getHandleRef(),
434  sizeof(result), &result, nullptr);
435  return read_domain_bitfield(result);
436  }
437 };
438 
439 // Specialization for partition type affinity domain, OpenCL can return other
440 // partition properties instead
441 template <>
443  info::device::partition_type_affinity_domain> {
445  size_t resultSize;
446  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
447  Dev->getHandleRef(),
449  nullptr, &resultSize);
450  if (resultSize != 1) {
452  }
453  cl_device_partition_property result;
454  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
455  Dev->getHandleRef(),
457  sizeof(result), &result, nullptr);
458  if (result == PI_DEVICE_AFFINITY_DOMAIN_NUMA ||
463  return info::partition_affinity_domain(result);
464  }
465 
467  }
468 };
469 
470 // Specialization for partition type
471 template <>
473  info::device::partition_type_property> {
475  size_t resultSize;
476  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
477  Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, 0, nullptr,
478  &resultSize);
479  if (!resultSize)
481 
482  size_t arrayLength = resultSize / sizeof(cl_device_partition_property);
483 
484  std::unique_ptr<cl_device_partition_property[]> arrayResult(
485  new cl_device_partition_property[arrayLength]);
486  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
487  Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, resultSize,
488  arrayResult.get(), nullptr);
489  if (!arrayResult[0])
491  return info::partition_property(arrayResult[0]);
492  }
493 };
494 // Specialization for supported subgroup sizes
495 template <>
496 struct get_device_info_impl<std::vector<size_t>,
497  info::device::sub_group_sizes> {
498  static std::vector<size_t> get(const DeviceImplPtr &Dev) {
499  size_t resultSize = 0;
500  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
502  0, nullptr, &resultSize);
503 
504  std::vector<uint32_t> result32(resultSize / sizeof(uint32_t));
505  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
507  resultSize, result32.data(), nullptr);
508 
509  std::vector<size_t> result;
510  result.reserve(result32.size());
511  for (uint32_t value : result32) {
512  result.push_back(value);
513  }
514  return result;
515  }
516 };
517 
518 // Specialization for kernel to kernel pipes.
519 // Here we step away from OpenCL, since there is no appropriate cl_device_info
520 // enum for global pipes feature.
521 template <>
522 struct get_device_info_impl<bool, info::device::kernel_kernel_pipe_support> {
523  static bool get(const DeviceImplPtr &Dev) {
524  // We claim, that all Intel FPGA devices support kernel to kernel pipe
525  // feature (at least at the scope of SYCL_INTEL_data_flow_pipes extension).
526  platform plt =
528  std::string platform_name = plt.get_info<info::platform::name>();
529  if (platform_name == "Intel(R) FPGA Emulation Platform for OpenCL(TM)" ||
530  platform_name == "Intel(R) FPGA SDK for OpenCL(TM)")
531  return true;
532 
533  // TODO: a better way is to query for supported SPIR-V capabilities when
534  // it's started to be possible. Also, if a device's backend supports
535  // SPIR-V 1.1 (where Pipe Storage feature was defined), than it supports
536  // the feature as well.
537  return false;
538  }
539 };
540 
541 template <int Dimensions>
542 range<Dimensions> construct_range(size_t *values) = delete;
543 // Due to the flipping of work group dimensions before kernel launch, the values
544 // should also be reversed.
545 template <> inline range<1> construct_range<1>(size_t *values) {
546  return {values[0]};
547 }
548 template <> inline range<2> construct_range<2>(size_t *values) {
549  return {values[1], values[0]};
550 }
551 template <> inline range<3> construct_range<3>(size_t *values) {
552  return {values[2], values[1], values[0]};
553 }
554 
555 // Specialization for max_work_item_sizes.
556 template <int Dimensions>
558  info::device::max_work_item_sizes<Dimensions>> {
559  static range<Dimensions> get(const DeviceImplPtr &Dev) {
560  size_t result[3];
561  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
562  Dev->getHandleRef(),
564  sizeof(result), &result, nullptr);
565  return construct_range<Dimensions>(result);
566  }
567 };
568 
570 
571 // Only for NVIDIA and AMD GPU architectures
572 constexpr std::pair<const char *, oneapi_exp_arch> NvidiaAmdGPUArchitectures[] =
573  {
574  {"5.0", oneapi_exp_arch::nvidia_gpu_sm_50},
575  {"5.2", oneapi_exp_arch::nvidia_gpu_sm_52},
576  {"5.3", oneapi_exp_arch::nvidia_gpu_sm_53},
577  {"6.0", oneapi_exp_arch::nvidia_gpu_sm_60},
578  {"6.1", oneapi_exp_arch::nvidia_gpu_sm_61},
579  {"6.2", oneapi_exp_arch::nvidia_gpu_sm_62},
580  {"7.0", oneapi_exp_arch::nvidia_gpu_sm_70},
581  {"7.2", oneapi_exp_arch::nvidia_gpu_sm_72},
582  {"7.5", oneapi_exp_arch::nvidia_gpu_sm_75},
583  {"8.0", oneapi_exp_arch::nvidia_gpu_sm_80},
584  {"8.6", oneapi_exp_arch::nvidia_gpu_sm_86},
585  {"8.7", oneapi_exp_arch::nvidia_gpu_sm_87},
586  {"8.9", oneapi_exp_arch::nvidia_gpu_sm_89},
587  {"9.0", oneapi_exp_arch::nvidia_gpu_sm_90},
588  {"gfx701", oneapi_exp_arch::amd_gpu_gfx701},
589  {"gfx702", oneapi_exp_arch::amd_gpu_gfx702},
590  {"gfx801", oneapi_exp_arch::amd_gpu_gfx801},
591  {"gfx802", oneapi_exp_arch::amd_gpu_gfx802},
592  {"gfx803", oneapi_exp_arch::amd_gpu_gfx803},
593  {"gfx805", oneapi_exp_arch::amd_gpu_gfx805},
594  {"gfx810", oneapi_exp_arch::amd_gpu_gfx810},
595  {"gfx900", oneapi_exp_arch::amd_gpu_gfx900},
596  {"gfx902", oneapi_exp_arch::amd_gpu_gfx902},
597  {"gfx904", oneapi_exp_arch::amd_gpu_gfx904},
598  {"gfx906", oneapi_exp_arch::amd_gpu_gfx906},
599  {"gfx908", oneapi_exp_arch::amd_gpu_gfx908},
600  {"gfx909", oneapi_exp_arch::amd_gpu_gfx909},
601  {"gfx90a", oneapi_exp_arch::amd_gpu_gfx90a},
602  {"gfx90c", oneapi_exp_arch::amd_gpu_gfx90c},
603  {"gfx940", oneapi_exp_arch::amd_gpu_gfx940},
604  {"gfx941", oneapi_exp_arch::amd_gpu_gfx941},
605  {"gfx942", oneapi_exp_arch::amd_gpu_gfx942},
606  {"gfx1010", oneapi_exp_arch::amd_gpu_gfx1010},
607  {"gfx1011", oneapi_exp_arch::amd_gpu_gfx1011},
608  {"gfx1012", oneapi_exp_arch::amd_gpu_gfx1012},
609  {"gfx1013", oneapi_exp_arch::amd_gpu_gfx1013},
610  {"gfx1030", oneapi_exp_arch::amd_gpu_gfx1030},
611  {"gfx1031", oneapi_exp_arch::amd_gpu_gfx1031},
612  {"gfx1032", oneapi_exp_arch::amd_gpu_gfx1032},
613  {"gfx1033", oneapi_exp_arch::amd_gpu_gfx1033},
614  {"gfx1034", oneapi_exp_arch::amd_gpu_gfx1034},
615  {"gfx1035", oneapi_exp_arch::amd_gpu_gfx1035},
616  {"gfx1036", oneapi_exp_arch::amd_gpu_gfx1036},
617  {"gfx1100", oneapi_exp_arch::amd_gpu_gfx1100},
618  {"gfx1101", oneapi_exp_arch::amd_gpu_gfx1101},
619  {"gfx1102", oneapi_exp_arch::amd_gpu_gfx1102},
620  {"gfx1103", oneapi_exp_arch::amd_gpu_gfx1103},
621  {"gfx1150", oneapi_exp_arch::amd_gpu_gfx1150},
622  {"gfx1151", oneapi_exp_arch::amd_gpu_gfx1151},
623  {"gfx1200", oneapi_exp_arch::amd_gpu_gfx1200},
624  {"gfx1201", oneapi_exp_arch::amd_gpu_gfx1201},
625 };
626 
627 // Only for Intel GPU architectures
628 constexpr std::pair<const int, oneapi_exp_arch> IntelGPUArchitectures[] = {
629  {0x02000000, oneapi_exp_arch::intel_gpu_bdw},
630  {0x02400009, oneapi_exp_arch::intel_gpu_skl},
631  {0x02404009, oneapi_exp_arch::intel_gpu_kbl},
632  {0x02408009, oneapi_exp_arch::intel_gpu_cfl},
633  {0x0240c000, oneapi_exp_arch::intel_gpu_apl},
634  {0x02410000, oneapi_exp_arch::intel_gpu_glk},
635  {0x02414000, oneapi_exp_arch::intel_gpu_whl},
636  {0x02418000, oneapi_exp_arch::intel_gpu_aml},
637  {0x0241c000, oneapi_exp_arch::intel_gpu_cml},
638  {0x02c00000, oneapi_exp_arch::intel_gpu_icllp},
639  {0x02c08000, oneapi_exp_arch::intel_gpu_ehl},
640  {0x03000000, oneapi_exp_arch::intel_gpu_tgllp},
641  {0x03004000, oneapi_exp_arch::intel_gpu_rkl},
642  {0x03008000, oneapi_exp_arch::intel_gpu_adl_s},
643  {0x0300c000, oneapi_exp_arch::intel_gpu_adl_p},
644  {0x03010000, oneapi_exp_arch::intel_gpu_adl_n},
645  {0x03028000, oneapi_exp_arch::intel_gpu_dg1},
646  {0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10},
647  {0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11},
648  {0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12},
649  {0x030f0007, oneapi_exp_arch::intel_gpu_pvc},
650  {0x030f4007, oneapi_exp_arch::intel_gpu_pvc_vg},
651  {0x03118004, oneapi_exp_arch::intel_gpu_mtl_u},
652  {0x0311c004, oneapi_exp_arch::intel_gpu_mtl_h},
653  {0x03128004, oneapi_exp_arch::intel_gpu_arl_h},
654  {0x05004004, oneapi_exp_arch::intel_gpu_bmg_g21},
655  {0x05010004, oneapi_exp_arch::intel_gpu_lnl_m},
656 };
657 
658 // Only for Intel CPU architectures
659 constexpr std::pair<const int, oneapi_exp_arch> IntelCPUArchitectures[] = {
660  {8, oneapi_exp_arch::intel_cpu_spr},
661  {9, oneapi_exp_arch::intel_cpu_gnr},
662 };
663 
664 template <>
669  backend CurrentBackend = Dev->getBackend();
670  if (Dev->is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend ||
671  backend::opencl == CurrentBackend)) {
672  auto MapArchIDToArchName = [](const int arch) {
673  for (const auto &Item : IntelGPUArchitectures) {
674  if (Item.first == arch)
675  return Item.second;
676  }
677  return ext::oneapi::experimental::architecture::unknown;
678  };
679  uint32_t DeviceIp;
680  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
681  Dev->getHandleRef(),
682  PiInfoCode<
684  sizeof(DeviceIp), &DeviceIp, nullptr);
685  return MapArchIDToArchName(DeviceIp);
686  } else if (Dev->is_gpu() && (backend::ext_oneapi_cuda == CurrentBackend ||
687  backend::ext_oneapi_hip == CurrentBackend)) {
688  auto MapArchIDToArchName = [](const char *arch) {
689  for (const auto &Item : NvidiaAmdGPUArchitectures) {
690  if (std::string_view(Item.first) == arch)
691  return Item.second;
692  }
693  return ext::oneapi::experimental::architecture::unknown;
694  };
695  size_t ResultSize = 0;
696  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
697  Dev->getHandleRef(), PiInfoCode<info::device::version>::value, 0,
698  nullptr, &ResultSize);
699  std::unique_ptr<char[]> DeviceArch(new char[ResultSize]);
700  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
701  Dev->getHandleRef(), PiInfoCode<info::device::version>::value,
702  ResultSize, DeviceArch.get(), nullptr);
703  std::string DeviceArchCopy(DeviceArch.get());
704  std::string DeviceArchSubstr =
705  DeviceArchCopy.substr(0, DeviceArchCopy.find(":"));
706  return MapArchIDToArchName(DeviceArchSubstr.data());
707  } else if (Dev->is_cpu() && backend::opencl == CurrentBackend) {
708  auto MapArchIDToArchName = [](const int arch) {
709  for (const auto &Item : IntelCPUArchitectures) {
710  if (Item.first == arch)
711  return Item.second;
712  }
713  return sycl::ext::oneapi::experimental::architecture::x86_64;
714  };
715  uint32_t DeviceIp;
716  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
717  Dev->getHandleRef(),
718  PiInfoCode<
720  sizeof(DeviceIp), &DeviceIp, nullptr);
721  return MapArchIDToArchName(DeviceIp);
722  } // else is not needed
723  // TODO: add support of other architectures by extending with else if
724  return ext::oneapi::experimental::architecture::unknown;
725  }
726 };
727 
728 template <>
730  std::vector<ext::oneapi::experimental::matrix::combination>,
731  ext::oneapi::experimental::info::device::matrix_combinations> {
732  static std::vector<ext::oneapi::experimental::matrix::combination>
733  get(const DeviceImplPtr &Dev) {
734  using namespace ext::oneapi::experimental::matrix;
735  using namespace ext::oneapi::experimental;
736  backend CurrentBackend = Dev->getBackend();
737  auto get_current_architecture = [&Dev]() -> std::optional<architecture> {
738  // this helper lambda ignores all runtime-related exceptions from
739  // quering the device architecture. For instance, if device architecture
740  // on user's machine is not supported by
741  // sycl_ext_oneapi_device_architecture, the runtime exception is omitted,
742  // and std::nullopt is returned.
743  try {
744  return get_device_info_impl<
745  architecture,
747  } catch (sycl::exception &e) {
748  if (e.code() != errc::runtime)
749  std::rethrow_exception(std::make_exception_ptr(e));
750  }
751  return std::nullopt;
752  };
753  std::optional<architecture> DeviceArchOpt = get_current_architecture();
754  if (!DeviceArchOpt.has_value())
755  return {};
756  architecture DeviceArch = DeviceArchOpt.value();
757  if (architecture::intel_cpu_spr == DeviceArch)
758  return {
759  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
760  matrix_type::sint32, matrix_type::sint32},
761  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
762  matrix_type::sint32, matrix_type::sint32},
763  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
764  matrix_type::sint32, matrix_type::sint32},
765  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
766  matrix_type::sint32, matrix_type::sint32},
767  {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
768  matrix_type::fp32, matrix_type::fp32},
769  };
770  else if (architecture::intel_cpu_gnr == DeviceArch)
771  return {
772  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
773  matrix_type::sint32, matrix_type::sint32},
774  {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
775  matrix_type::sint32, matrix_type::sint32},
776  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
777  matrix_type::sint32, matrix_type::sint32},
778  {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
779  matrix_type::sint32, matrix_type::sint32},
780  {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
781  matrix_type::fp32, matrix_type::fp32},
782  {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
783  matrix_type::fp32, matrix_type::fp32},
784  };
785  else if (architecture::intel_gpu_pvc == DeviceArch)
786  return {
787  {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::uint8,
788  matrix_type::sint32, matrix_type::sint32},
789  {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::sint8,
790  matrix_type::sint32, matrix_type::sint32},
791  {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::uint8,
792  matrix_type::sint32, matrix_type::sint32},
793  {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::sint8,
794  matrix_type::sint32, matrix_type::sint32},
795  {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
796  matrix_type::fp32, matrix_type::fp32},
797  {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
798  matrix_type::fp32, matrix_type::fp32},
799  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
800  matrix_type::fp32, matrix_type::fp32},
801  {0, 0, 0, 1, 64, 16, matrix_type::bf16, matrix_type::bf16,
802  matrix_type::fp32, matrix_type::fp32},
803  {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
804  matrix_type::fp32, matrix_type::fp32},
805  {8, 0, 0, 0, 16, 8, matrix_type::tf32, matrix_type::tf32,
806  matrix_type::fp32, matrix_type::fp32},
807  };
808  else if ((architecture::intel_gpu_dg2_g10 == DeviceArch) ||
809  (architecture::intel_gpu_dg2_g11 == DeviceArch) ||
810  (architecture::intel_gpu_dg2_g12 == DeviceArch))
811  return {
812  {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::uint8,
813  matrix_type::sint32, matrix_type::sint32},
814  {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::sint8,
815  matrix_type::sint32, matrix_type::sint32},
816  {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::uint8,
817  matrix_type::sint32, matrix_type::sint32},
818  {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::sint8,
819  matrix_type::sint32, matrix_type::sint32},
820  {8, 0, 0, 0, 8, 16, matrix_type::fp16, matrix_type::fp16,
821  matrix_type::fp32, matrix_type::fp32},
822  {8, 0, 0, 0, 8, 16, matrix_type::bf16, matrix_type::bf16,
823  matrix_type::fp32, matrix_type::fp32},
824  };
825  else if (architecture::amd_gpu_gfx90a == DeviceArch)
826  return {
827  {0, 0, 0, 32, 32, 8, matrix_type::fp16, matrix_type::fp16,
828  matrix_type::fp32, matrix_type::fp32},
829  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
830  matrix_type::fp32, matrix_type::fp32},
831  {0, 0, 0, 32, 32, 8, matrix_type::sint8, matrix_type::sint8,
832  matrix_type::sint32, matrix_type::sint32},
833  {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
834  matrix_type::sint32, matrix_type::sint32},
835  {0, 0, 0, 32, 32, 8, matrix_type::bf16, matrix_type::bf16,
836  matrix_type::fp32, matrix_type::fp32},
837  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
838  matrix_type::fp32, matrix_type::fp32},
839  {0, 0, 0, 16, 16, 4, matrix_type::fp64, matrix_type::fp64,
840  matrix_type::fp64, matrix_type::fp64},
841  };
842  else if (backend::ext_oneapi_cuda == CurrentBackend) {
843  // TODO: Tho following can be simplified when comparison of architectures
844  // using < and > will be implemented
846  constexpr std::pair<float, oneapi_exp_arch> NvidiaArchNumbs[] = {
847  {5.0, oneapi_exp_arch::nvidia_gpu_sm_50},
848  {5.2, oneapi_exp_arch::nvidia_gpu_sm_52},
849  {5.3, oneapi_exp_arch::nvidia_gpu_sm_53},
850  {6.0, oneapi_exp_arch::nvidia_gpu_sm_60},
851  {6.1, oneapi_exp_arch::nvidia_gpu_sm_61},
852  {6.2, oneapi_exp_arch::nvidia_gpu_sm_62},
853  {7.0, oneapi_exp_arch::nvidia_gpu_sm_70},
854  {7.2, oneapi_exp_arch::nvidia_gpu_sm_72},
855  {7.5, oneapi_exp_arch::nvidia_gpu_sm_75},
856  {8.0, oneapi_exp_arch::nvidia_gpu_sm_80},
857  {8.6, oneapi_exp_arch::nvidia_gpu_sm_86},
858  {8.7, oneapi_exp_arch::nvidia_gpu_sm_87},
859  {8.9, oneapi_exp_arch::nvidia_gpu_sm_89},
860  {9.0, oneapi_exp_arch::nvidia_gpu_sm_90},
861  };
862  auto GetArchNum = [&](const architecture &arch) {
863  for (const auto &Item : NvidiaArchNumbs)
864  if (Item.second == arch)
865  return Item.first;
866  return 0.f;
867  };
868  float ComputeCapability = GetArchNum(DeviceArch);
869  std::vector<combination> sm_70_combinations = {
870  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
871  matrix_type::fp32, matrix_type::fp32},
872  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
873  matrix_type::fp32, matrix_type::fp32},
874  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
875  matrix_type::fp32, matrix_type::fp32},
876  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
877  matrix_type::fp16, matrix_type::fp16},
878  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
879  matrix_type::fp16, matrix_type::fp16},
880  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
881  matrix_type::fp16, matrix_type::fp16},
882  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
883  matrix_type::fp32, matrix_type::fp16},
884  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
885  matrix_type::fp32, matrix_type::fp16},
886  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
887  matrix_type::fp32, matrix_type::fp16},
888  {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
889  matrix_type::fp16, matrix_type::fp32},
890  {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
891  matrix_type::fp16, matrix_type::fp32},
892  {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
893  matrix_type::fp16, matrix_type::fp32}};
894  std::vector<combination> sm_72_combinations = {
895  {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
896  matrix_type::sint32, matrix_type::sint32},
897  {0, 0, 0, 8, 32, 16, matrix_type::sint8, matrix_type::sint8,
898  matrix_type::sint32, matrix_type::sint32},
899  {0, 0, 0, 32, 8, 16, matrix_type::sint8, matrix_type::sint8,
900  matrix_type::sint32, matrix_type::sint32},
901  {0, 0, 0, 16, 16, 16, matrix_type::uint8, matrix_type::uint8,
902  matrix_type::sint32, matrix_type::sint32},
903  {0, 0, 0, 8, 32, 16, matrix_type::uint8, matrix_type::uint8,
904  matrix_type::sint32, matrix_type::sint32},
905  {0, 0, 0, 32, 8, 16, matrix_type::uint8, matrix_type::uint8,
906  matrix_type::sint32, matrix_type::sint32}};
907  std::vector<combination> sm_80_combinations = {
908  {0, 0, 0, 16, 16, 8, matrix_type::tf32, matrix_type::tf32,
909  matrix_type::fp32, matrix_type::fp32},
910  {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
911  matrix_type::fp32, matrix_type::fp32},
912  {0, 0, 0, 8, 32, 16, matrix_type::bf16, matrix_type::bf16,
913  matrix_type::fp32, matrix_type::fp32},
914  {0, 0, 0, 32, 8, 16, matrix_type::bf16, matrix_type::bf16,
915  matrix_type::fp32, matrix_type::fp32},
916  {0, 0, 0, 8, 8, 4, matrix_type::fp64, matrix_type::fp64,
917  matrix_type::fp64, matrix_type::fp64}};
918  if (ComputeCapability >= 8.0) {
919  sm_80_combinations.insert(sm_80_combinations.end(),
920  sm_72_combinations.begin(),
921  sm_72_combinations.end());
922  sm_80_combinations.insert(sm_80_combinations.end(),
923  sm_70_combinations.begin(),
924  sm_70_combinations.end());
925  return sm_80_combinations;
926  } else if (ComputeCapability >= 7.2) {
927  sm_72_combinations.insert(sm_72_combinations.end(),
928  sm_70_combinations.begin(),
929  sm_70_combinations.end());
930  return sm_72_combinations;
931  } else if (ComputeCapability >= 7.0)
932  return sm_70_combinations;
933  }
934  return {};
935  }
936 };
937 
938 template <>
940  size_t, ext::oneapi::experimental::info::device::max_global_work_groups> {
941  static size_t get(const DeviceImplPtr) {
942  return static_cast<size_t>((std::numeric_limits<int>::max)());
943  }
944 };
945 template <>
947  id<1>, ext::oneapi::experimental::info::device::max_work_groups<1>> {
948  static id<1> get(const DeviceImplPtr &Dev) {
949  size_t result[3];
950  size_t Limit =
951  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
952  max_global_work_groups>::get(Dev);
953  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
954  Dev->getHandleRef(),
955  PiInfoCode<
957  sizeof(result), &result, nullptr);
958  return id<1>(std::min(Limit, result[0]));
959  }
960 };
961 
962 template <>
964  id<2>, ext::oneapi::experimental::info::device::max_work_groups<2>> {
965  static id<2> get(const DeviceImplPtr &Dev) {
966  size_t result[3];
967  size_t Limit =
968  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
969  max_global_work_groups>::get(Dev);
970  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
971  Dev->getHandleRef(),
972  PiInfoCode<
974  sizeof(result), &result, nullptr);
975  return id<2>(std::min(Limit, result[1]), std::min(Limit, result[0]));
976  }
977 };
978 
979 template <>
981  id<3>, ext::oneapi::experimental::info::device::max_work_groups<3>> {
982  static id<3> get(const DeviceImplPtr &Dev) {
983  size_t result[3];
984  size_t Limit =
985  get_device_info_impl<size_t, ext::oneapi::experimental::info::device::
986  max_global_work_groups>::get(Dev);
987  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
988  Dev->getHandleRef(),
989  PiInfoCode<
991  sizeof(result), &result, nullptr);
992  return id<3>(std::min(Limit, result[2]), std::min(Limit, result[1]),
993  std::min(Limit, result[0]));
994  }
995 };
996 
997 // TODO:Remove with deprecated feature
998 // device::get_info<info::device::ext_oneapi_max_global_work_groups>
999 template <>
1000 struct get_device_info_impl<size_t,
1001  info::device::ext_oneapi_max_global_work_groups> {
1002  static size_t get(const DeviceImplPtr &Dev) {
1003  return get_device_info_impl<size_t,
1004  ext::oneapi::experimental::info::device::
1005  max_global_work_groups>::get(Dev);
1006  }
1007 };
1008 
1009 // TODO:Remove with deprecated feature
1010 // device::get_info<info::device::ext_oneapi_max_work_groups_1d>
1011 template <>
1013  info::device::ext_oneapi_max_work_groups_1d> {
1014  static id<1> get(const DeviceImplPtr &Dev) {
1015  return get_device_info_impl<
1016  id<1>,
1018  }
1019 };
1020 
1021 // TODO:Remove with deprecated feature
1022 // device::get_info<info::device::ext_oneapi_max_work_groups_2d>
1023 template <>
1025  info::device::ext_oneapi_max_work_groups_2d> {
1026  static id<2> get(const DeviceImplPtr &Dev) {
1027  return get_device_info_impl<
1028  id<2>,
1030  }
1031 };
1032 
1033 // TODO:Remove with deprecated feature
1034 // device::get_info<info::device::ext_oneapi_max_work_groups_3d>
1035 template <>
1037  info::device::ext_oneapi_max_work_groups_3d> {
1038  static id<3> get(const DeviceImplPtr &Dev) {
1039  return get_device_info_impl<
1040  id<3>,
1042  }
1043 };
1044 
1045 // Specialization for parent device
1046 template <> struct get_device_info_impl<device, info::device::parent_device> {
1047  static device get(const DeviceImplPtr &Dev) {
1048  typename sycl_to_pi<device>::type result;
1049  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1051  sizeof(result), &result, nullptr);
1052  if (result == nullptr)
1053  throw invalid_object_error(
1054  "No parent for device because it is not a subdevice",
1055  PI_ERROR_INVALID_DEVICE);
1056 
1057  const auto &Platform = Dev->getPlatformImpl();
1058  return createSyclObjFromImpl<device>(
1059  Platform->getOrMakeDeviceImpl(result, Platform));
1060  }
1061 };
1062 
1063 // Specialization for image_support
1064 template <> struct get_device_info_impl<bool, info::device::image_support> {
1065  static bool get(const DeviceImplPtr &) {
1066  // No devices currently support SYCL 2020 images.
1067  return false;
1068  }
1069 };
1070 
1071 // USM
1072 
1073 // Specialization for device usm query.
1074 template <>
1075 struct get_device_info_impl<bool, info::device::usm_device_allocations> {
1076  static bool get(const DeviceImplPtr &Dev) {
1077  pi_usm_capabilities caps;
1078  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1079  Dev->getHandleRef(),
1081  sizeof(pi_usm_capabilities), &caps, nullptr);
1082 
1083  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1084  }
1085 };
1086 
1087 // Specialization for host usm query.
1088 template <>
1089 struct get_device_info_impl<bool, info::device::usm_host_allocations> {
1090  static bool get(const DeviceImplPtr &Dev) {
1091  pi_usm_capabilities caps;
1092  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1093  Dev->getHandleRef(),
1095  sizeof(pi_usm_capabilities), &caps, nullptr);
1096 
1097  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1098  }
1099 };
1100 
1101 // Specialization for shared usm query.
1102 template <>
1103 struct get_device_info_impl<bool, info::device::usm_shared_allocations> {
1104  static bool get(const DeviceImplPtr &Dev) {
1105  pi_usm_capabilities caps;
1106  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1107  Dev->getHandleRef(),
1109  sizeof(pi_usm_capabilities), &caps, nullptr);
1110  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1111  }
1112 };
1113 
1114 // Specialization for restricted usm query
1115 template <>
1117  info::device::usm_restricted_shared_allocations> {
1118  static bool get(const DeviceImplPtr &Dev) {
1119  pi_usm_capabilities caps;
1120  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1121  Dev->getHandleRef(),
1123  sizeof(pi_usm_capabilities), &caps, nullptr);
1124  // Check that we don't support any cross device sharing
1125  return (Err != PI_SUCCESS)
1126  ? false
1127  : !(caps & (PI_USM_ACCESS | PI_USM_CONCURRENT_ACCESS));
1128  }
1129 };
1130 
1131 // Specialization for system usm query
1132 template <>
1133 struct get_device_info_impl<bool, info::device::usm_system_allocations> {
1134  static bool get(const DeviceImplPtr &Dev) {
1135  pi_usm_capabilities caps;
1136  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1137  Dev->getHandleRef(),
1139  sizeof(pi_usm_capabilities), &caps, nullptr);
1140  return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
1141  }
1142 };
1143 
1144 // Specialization for kernel fusion support
1145 template <>
1147  bool, ext::codeplay::experimental::info::device::supports_fusion> {
1148  static bool get(const DeviceImplPtr &Dev) {
1149 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
1150  // If the JIT library can't be loaded or entry points in the JIT library
1151  // can't be resolved, fusion is not available.
1152  if (!jit_compiler::get_instance().isAvailable()) {
1153  return false;
1154  }
1155  // Currently fusion is only supported for SPIR-V based backends,
1156  // CUDA and HIP.
1157  if (Dev->getBackend() == backend::opencl) {
1158  // Exclude all non-CPU or non-GPU devices on OpenCL, in particular
1159  // accelerators.
1160  return Dev->is_cpu() || Dev->is_gpu();
1161  }
1162 
1163  return (Dev->getBackend() == backend::ext_oneapi_level_zero) ||
1164  (Dev->getBackend() == backend::ext_oneapi_cuda) ||
1165  (Dev->getBackend() == backend::ext_oneapi_hip);
1166 #else // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1167  (void)Dev;
1168  return false;
1169 #endif // SYCL_EXT_CODEPLAY_KERNEL_FUSION
1170  }
1171 };
1172 
1173 // Specialization for max registers per work-group
1174 template <>
1176  uint32_t,
1177  ext::codeplay::experimental::info::device::max_registers_per_work_group> {
1178  static uint32_t get(const DeviceImplPtr &Dev) {
1179  uint32_t maxRegsPerWG;
1180  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1181  Dev->getHandleRef(),
1182  PiInfoCode<ext::codeplay::experimental::info::device::
1183  max_registers_per_work_group>::value,
1184  sizeof(maxRegsPerWG), &maxRegsPerWG, nullptr);
1185  return maxRegsPerWG;
1186  }
1187 };
1188 
1189 // Specialization for composite devices extension.
1190 template <>
1192  std::vector<sycl::device>,
1193  ext::oneapi::experimental::info::device::component_devices> {
1194  static std::vector<sycl::device> get(const DeviceImplPtr &Dev) {
1195  if (Dev->getBackend() != backend::ext_oneapi_level_zero)
1196  return {};
1197  size_t ResultSize = 0;
1198  // First call to get DevCount.
1199  pi_result Err = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1200  Dev->getHandleRef(),
1201  PiInfoCode<
1202  ext::oneapi::experimental::info::device::component_devices>::value,
1203  0, nullptr, &ResultSize);
1204 
1205  // If the feature is unsupported or if the result was empty, return an empty
1206  // list of devices.
1207  if (Err == PI_ERROR_INVALID_VALUE || (Err == PI_SUCCESS && ResultSize == 0))
1208  return {};
1209 
1210  // Otherwise, if there was an error from PI it is unexpected and we should
1211  // handle it accordingly.
1212  Dev->getPlugin()->checkPiResult(Err);
1213 
1214  size_t DevCount = ResultSize / sizeof(pi_device);
1215  // Second call to get the list.
1216  std::vector<pi_device> Devs(DevCount);
1217  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1218  Dev->getHandleRef(),
1219  PiInfoCode<
1220  ext::oneapi::experimental::info::device::component_devices>::value,
1221  ResultSize, Devs.data(), nullptr);
1222  std::vector<sycl::device> Result;
1223  const auto &Platform = Dev->getPlatformImpl();
1224  for (const auto &d : Devs)
1225  Result.push_back(createSyclObjFromImpl<device>(
1226  Platform->getOrMakeDeviceImpl(d, Platform)));
1227 
1228  return Result;
1229  }
1230 };
1231 template <>
1233  sycl::device, ext::oneapi::experimental::info::device::composite_device> {
1234  static sycl::device get(const DeviceImplPtr &Dev) {
1235  if (!Dev->has(sycl::aspect::ext_oneapi_is_component))
1237  "Only devices with aspect::ext_oneapi_is_component "
1238  "can call this function.");
1239 
1240  typename sycl_to_pi<device>::type Result;
1241  Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
1242  Dev->getHandleRef(),
1243  PiInfoCode<
1244  ext::oneapi::experimental::info::device::composite_device>::value,
1245  sizeof(Result), &Result, nullptr);
1246 
1247  if (Result) {
1248  const auto &Platform = Dev->getPlatformImpl();
1249  return createSyclObjFromImpl<device>(
1250  Platform->getOrMakeDeviceImpl(Result, Platform));
1251  }
1253  "A component with aspect::ext_oneapi_is_component "
1254  "must have a composite device.");
1255  }
1256 };
1257 
1258 template <typename Param>
1259 typename Param::return_type get_device_info(const DeviceImplPtr &Dev) {
1260  static_assert(is_device_info_desc<Param>::value,
1261  "Invalid device information descriptor");
1262  if (std::is_same<Param,
1263  sycl::_V1::ext::intel::info::device::free_memory>::value) {
1264  if (!Dev->has(aspect::ext_intel_free_memory))
1265  throw invalid_object_error(
1266  "The device does not have the ext_intel_free_memory aspect",
1267  PI_ERROR_INVALID_DEVICE);
1268  }
1270 }
1271 
1272 // SYCL host device information
1273 
1274 // Default template is disabled, all possible instantiations are
1275 // specified explicitly.
1276 template <typename Param>
1277 inline typename Param::return_type get_device_info_host() = delete;
1278 
1279 template <>
1280 inline std::vector<sycl::aspect> get_device_info_host<info::device::aspects>() {
1281  return std::vector<sycl::aspect>();
1282 }
1283 
1284 template <>
1286 get_device_info_host<ext::oneapi::experimental::info::device::architecture>() {
1287  return ext::oneapi::experimental::architecture::x86_64;
1288 }
1289 
1290 template <>
1291 inline info::device_type get_device_info_host<info::device::device_type>() {
1292  return info::device_type::host;
1293 }
1294 
1295 template <> inline uint32_t get_device_info_host<info::device::vendor_id>() {
1296  return 0x8086;
1297 }
1298 
1299 template <>
1300 inline uint32_t get_device_info_host<info::device::max_compute_units>() {
1301  return std::thread::hardware_concurrency();
1302 }
1303 
1304 template <>
1305 inline uint32_t get_device_info_host<info::device::max_work_item_dimensions>() {
1306  return 3;
1307 }
1308 
1309 template <>
1310 inline range<1> get_device_info_host<info::device::max_work_item_sizes<1>>() {
1311  // current value is the required minimum
1312  return {1};
1313 }
1314 
1315 template <>
1316 inline range<2> get_device_info_host<info::device::max_work_item_sizes<2>>() {
1317  // current value is the required minimum
1318  return {1, 1};
1319 }
1320 
1321 template <>
1322 inline range<3> get_device_info_host<info::device::max_work_item_sizes<3>>() {
1323  // current value is the required minimum
1324  return {1, 1, 1};
1325 }
1326 
1327 template <>
1328 inline constexpr size_t get_device_info_host<
1329  ext::oneapi::experimental::info::device::max_global_work_groups>() {
1330  // See handler.hpp for the maximum value :
1331  return static_cast<size_t>((std::numeric_limits<int>::max)());
1332 }
1333 
1334 template <>
1337  // See handler.hpp for the maximum value :
1338  static constexpr size_t Limit = get_device_info_host<
1339  ext::oneapi::experimental::info::device::max_global_work_groups>();
1340  return {Limit};
1341 }
1342 
1343 template <>
1344 inline id<2> get_device_info_host<
1345  ext::oneapi::experimental::info::device::max_work_groups<2>>() {
1346  // See handler.hpp for the maximum value :
1347  static constexpr size_t Limit = get_device_info_host<
1348  ext::oneapi::experimental::info::device::max_global_work_groups>();
1349  return {Limit, Limit};
1350 }
1351 
1352 template <>
1353 inline id<3> get_device_info_host<
1354  ext::oneapi::experimental::info::device::max_work_groups<3>>() {
1355  // See handler.hpp for the maximum value :
1356  static constexpr size_t Limit = get_device_info_host<
1357  ext::oneapi::experimental::info::device::max_global_work_groups>();
1358  return {Limit, Limit, Limit};
1359 }
1360 
1361 // TODO:remove with deprecated feature
1362 // device::get_info<info::device::ext_oneapi_max_global_work_groups>
1363 template <>
1364 inline constexpr size_t
1365 get_device_info_host<info::device::ext_oneapi_max_global_work_groups>() {
1366  return get_device_info_host<
1367  ext::oneapi::experimental::info::device::max_global_work_groups>();
1368 }
1369 
1370 // TODO:remove with deprecated feature
1371 // device::get_info<info::device::ext_oneapi_max_work_groups_1d>
1372 template <>
1373 inline id<1>
1374 get_device_info_host<info::device::ext_oneapi_max_work_groups_1d>() {
1375 
1376  return get_device_info_host<
1378 }
1379 
1380 // TODO:remove with deprecated feature
1381 // device::get_info<info::device::ext_oneapi_max_work_groups_2d>
1382 template <>
1383 inline id<2>
1384 get_device_info_host<info::device::ext_oneapi_max_work_groups_2d>() {
1385  return get_device_info_host<
1387 }
1388 
1389 // TODO:remove with deprecated feature
1390 // device::get_info<info::device::ext_oneapi_max_work_groups_3d>
1391 template <>
1392 inline id<3>
1393 get_device_info_host<info::device::ext_oneapi_max_work_groups_3d>() {
1394  return get_device_info_host<
1395  ext::oneapi::experimental::info::device::max_work_groups<3>>();
1396 }
1397 
1398 template <>
1399 inline size_t get_device_info_host<info::device::max_work_group_size>() {
1400  // current value is the required minimum
1401  return 1;
1402 }
1403 
1404 template <>
1405 inline uint32_t
1406 get_device_info_host<info::device::preferred_vector_width_char>() {
1407  // TODO update when appropriate
1408  return 1;
1409 }
1410 
1411 template <>
1412 inline uint32_t
1413 get_device_info_host<info::device::preferred_vector_width_short>() {
1414  // TODO update when appropriate
1415  return 1;
1416 }
1417 
1418 template <>
1419 inline uint32_t
1420 get_device_info_host<info::device::preferred_vector_width_int>() {
1421  // TODO update when appropriate
1422  return 1;
1423 }
1424 
1425 template <>
1426 inline uint32_t
1427 get_device_info_host<info::device::preferred_vector_width_long>() {
1428  // TODO update when appropriate
1429  return 1;
1430 }
1431 
1432 template <>
1433 inline uint32_t
1434 get_device_info_host<info::device::preferred_vector_width_float>() {
1435  // TODO update when appropriate
1436  return 1;
1437 }
1438 
1439 template <>
1440 inline uint32_t
1441 get_device_info_host<info::device::preferred_vector_width_double>() {
1442  // TODO update when appropriate
1443  return 1;
1444 }
1445 
1446 template <>
1447 inline uint32_t
1448 get_device_info_host<info::device::preferred_vector_width_half>() {
1449  // TODO update when appropriate
1450  return 0;
1451 }
1452 
1453 template <>
1454 inline uint32_t get_device_info_host<info::device::native_vector_width_char>() {
1456 }
1457 
1458 template <>
1459 inline uint32_t
1460 get_device_info_host<info::device::native_vector_width_short>() {
1462 }
1463 
1464 template <>
1465 inline uint32_t get_device_info_host<info::device::native_vector_width_int>() {
1467 }
1468 
1469 template <>
1470 inline uint32_t get_device_info_host<info::device::native_vector_width_long>() {
1472 }
1473 
1474 template <>
1475 inline uint32_t
1476 get_device_info_host<info::device::native_vector_width_float>() {
1478 }
1479 
1480 template <>
1481 inline uint32_t
1482 get_device_info_host<info::device::native_vector_width_double>() {
1484 }
1485 
1486 template <>
1487 inline uint32_t get_device_info_host<info::device::native_vector_width_half>() {
1489 }
1490 
1491 template <>
1492 inline uint32_t get_device_info_host<info::device::max_clock_frequency>() {
1494 }
1495 
1496 template <> inline uint32_t get_device_info_host<info::device::address_bits>() {
1497  return sizeof(void *) * 8;
1498 }
1499 
1500 template <>
1501 inline uint64_t get_device_info_host<info::device::global_mem_size>() {
1502  return static_cast<uint64_t>(OSUtil::getOSMemSize());
1503 }
1504 
1505 template <>
1506 inline uint64_t get_device_info_host<info::device::max_mem_alloc_size>() {
1507  // current value is the required minimum
1508  const uint64_t a = get_device_info_host<info::device::global_mem_size>() / 4;
1509  const uint64_t b = 128ul * 1024 * 1024;
1510  return (a > b) ? a : b;
1511 }
1512 
1513 template <> inline bool get_device_info_host<info::device::image_support>() {
1514  return true;
1515 }
1516 
1517 template <> inline bool get_device_info_host<info::device::atomic64>() {
1518  return false;
1519 }
1520 
1521 template <>
1522 inline std::vector<memory_order>
1523 get_device_info_host<info::device::atomic_memory_order_capabilities>() {
1526 }
1527 
1528 template <>
1529 inline std::vector<memory_order>
1530 get_device_info_host<info::device::atomic_fence_order_capabilities>() {
1533 }
1534 
1535 template <>
1536 inline std::vector<memory_scope>
1537 get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
1540 }
1541 
1542 template <>
1543 inline std::vector<memory_scope>
1544 get_device_info_host<info::device::atomic_fence_scope_capabilities>() {
1547 }
1548 
1549 template <>
1550 inline bool
1551 get_device_info_host<info::device::ext_oneapi_bfloat16_math_functions>() {
1552  return false;
1553 }
1554 
1555 template <>
1556 inline uint32_t get_device_info_host<info::device::max_read_image_args>() {
1557  // current value is the required minimum
1558  return 128;
1559 }
1560 
1561 template <>
1562 inline uint32_t get_device_info_host<info::device::max_write_image_args>() {
1563  // current value is the required minimum
1564  return 8;
1565 }
1566 
1567 template <>
1568 inline size_t get_device_info_host<info::device::image2d_max_width>() {
1569  // SYCL guarantees at least 8192. Some devices already known to provide more
1570  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1571  // image object allocation on host.
1572  // Using any fixed number (i.e. 16384) brings the risk of having similar
1573  // issues on newer devices in future. Thus it does not make sense limiting
1574  // the returned value on host. Practially speaking the returned value on host
1575  // depends only on memory required for the image, which also depends on
1576  // the image channel_type and the image height. Both are not known in this
1577  // query, thus it becomes user's responsibility to choose proper image
1578  // parameters depending on similar query to (non-host device) and amount
1579  // of available/allocatable memory.
1581 }
1582 
1583 template <>
1584 inline size_t get_device_info_host<info::device::image2d_max_height>() {
1585  // SYCL guarantees at least 8192. Some devices already known to provide more
1586  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1587  // image object allocation on host.
1588  // Using any fixed number (i.e. 16384) brings the risk of having similar
1589  // issues on newer devices in future. Thus it does not make sense limiting
1590  // the returned value on host. Practially speaking the returned value on host
1591  // depends only on memory required for the image, which also depends on
1592  // the image channel_type and the image width. Both are not known in this
1593  // query, thus it becomes user's responsibility to choose proper image
1594  // parameters depending on similar query to (non-host device) and amount
1595  // of available/allocatable memory.
1597 }
1598 
1599 template <>
1600 inline size_t get_device_info_host<info::device::image3d_max_width>() {
1601  // SYCL guarantees at least 8192. Some devices already known to provide more
1602  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1603  // image object allocation on host.
1604  // Using any fixed number (i.e. 16384) brings the risk of having similar
1605  // issues on newer devices in future. Thus it does not make sense limiting
1606  // the returned value on host. Practially speaking the returned value on host
1607  // depends only on memory required for the image, which also depends on
1608  // the image channel_type and the image height/depth. Both are not known
1609  // in this query, thus it becomes user's responsibility to choose proper image
1610  // parameters depending on similar query to (non-host device) and amount
1611  // of available/allocatable memory.
1613 }
1614 
1615 template <>
1616 inline size_t get_device_info_host<info::device::image3d_max_height>() {
1617  // SYCL guarantees at least 8192. Some devices already known to provide more
1618  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1619  // image object allocation on host.
1620  // Using any fixed number (i.e. 16384) brings the risk of having similar
1621  // issues on newer devices in future. Thus it does not make sense limiting
1622  // the returned value on host. Practially speaking the returned value on host
1623  // depends only on memory required for the image, which also depends on
1624  // the image channel_type and the image width/depth. Both are not known
1625  // in this query, thus it becomes user's responsibility to choose proper image
1626  // parameters depending on similar query to (non-host device) and amount
1627  // of available/allocatable memory.
1629 }
1630 
1631 template <>
1632 inline size_t get_device_info_host<info::device::image3d_max_depth>() {
1633  // SYCL guarantees at least 8192. Some devices already known to provide more
1634  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
1635  // image object allocation on host.
1636  // Using any fixed number (i.e. 16384) brings the risk of having similar
1637  // issues on newer devices in future. Thus it does not make sense limiting
1638  // the returned value on host. Practially speaking the returned value on host
1639  // depends only on memory required for the image, which also depends on
1640  // the image channel_type and the image height/width, which are not known
1641  // in this query, thus it becomes user's responsibility to choose proper image
1642  // parameters depending on similar query to (non-host device) and amount
1643  // of available/allocatable memory.
1645 }
1646 
1647 template <>
1648 inline size_t get_device_info_host<info::device::image_max_buffer_size>() {
1649  // Not supported in SYCL
1650  return 0;
1651 }
1652 
1653 template <>
1654 inline size_t get_device_info_host<info::device::image_max_array_size>() {
1655  // current value is the required minimum
1656  return 2048;
1657 }
1658 
1659 template <> inline uint32_t get_device_info_host<info::device::max_samplers>() {
1660  // current value is the required minimum
1661  return 16;
1662 }
1663 
1664 template <>
1665 inline size_t get_device_info_host<info::device::max_parameter_size>() {
1666  // current value is the required minimum
1667  return 1024;
1668 }
1669 
1670 template <>
1671 inline uint32_t get_device_info_host<info::device::mem_base_addr_align>() {
1672  return 1024;
1673 }
1674 
1675 template <>
1676 inline std::vector<info::fp_config>
1677 get_device_info_host<info::device::half_fp_config>() {
1678  // current value is the required minimum
1679  return {};
1680 }
1681 
1682 template <>
1683 inline std::vector<info::fp_config>
1684 get_device_info_host<info::device::single_fp_config>() {
1685  // current value is the required minimum
1687 }
1688 
1689 template <>
1690 inline std::vector<info::fp_config>
1691 get_device_info_host<info::device::double_fp_config>() {
1692  // current value is the required minimum
1696 }
1697 
1698 template <>
1700 get_device_info_host<info::device::global_mem_cache_type>() {
1702 }
1703 
1704 template <>
1705 inline uint32_t
1706 get_device_info_host<info::device::global_mem_cache_line_size>() {
1708 }
1709 
1710 template <>
1711 inline uint64_t get_device_info_host<info::device::global_mem_cache_size>() {
1713 }
1714 
1715 template <>
1716 inline uint64_t get_device_info_host<info::device::max_constant_buffer_size>() {
1717  // current value is the required minimum
1718  return 64 * 1024;
1719 }
1720 
1721 template <>
1722 inline uint32_t get_device_info_host<info::device::max_constant_args>() {
1723  // current value is the required minimum
1724  return 8;
1725 }
1726 
1727 template <>
1729 get_device_info_host<info::device::local_mem_type>() {
1731 }
1732 
1733 template <>
1734 inline uint64_t get_device_info_host<info::device::local_mem_size>() {
1735  // current value is the required minimum
1736  return 32 * 1024;
1737 }
1738 
1739 template <>
1740 inline bool get_device_info_host<info::device::error_correction_support>() {
1741  return false;
1742 }
1743 
1744 template <>
1745 inline bool get_device_info_host<info::device::host_unified_memory>() {
1746  return true;
1747 }
1748 
1749 template <>
1750 inline size_t get_device_info_host<info::device::profiling_timer_resolution>() {
1751  typedef std::ratio_divide<std::chrono::high_resolution_clock::period,
1752  std::nano>
1753  ns_period;
1754  return ns_period::num / ns_period::den;
1755 }
1756 
1757 template <> inline bool get_device_info_host<info::device::is_endian_little>() {
1758  union {
1759  uint16_t a;
1760  uint8_t b[2];
1761  } u = {0x0100};
1762 
1763  return u.b[1];
1764 }
1765 
1766 template <> inline bool get_device_info_host<info::device::is_available>() {
1767  return true;
1768 }
1769 
1770 template <>
1771 inline bool get_device_info_host<info::device::is_compiler_available>() {
1772  return true;
1773 }
1774 
1775 template <>
1776 inline bool get_device_info_host<info::device::is_linker_available>() {
1777  return true;
1778 }
1779 
1780 template <>
1781 inline std::vector<info::execution_capability>
1782 get_device_info_host<info::device::execution_capabilities>() {
1784 }
1785 
1786 template <> inline bool get_device_info_host<info::device::queue_profiling>() {
1787  return true;
1788 }
1789 
1790 template <>
1791 inline std::vector<kernel_id>
1792 get_device_info_host<info::device::built_in_kernel_ids>() {
1793  return {};
1794 }
1795 
1796 template <>
1797 inline std::vector<std::string>
1798 get_device_info_host<info::device::built_in_kernels>() {
1799  return {};
1800 }
1801 
1802 template <> inline platform get_device_info_host<info::device::platform>() {
1803  return createSyclObjFromImpl<platform>(platform_impl::getHostPlatformImpl());
1804 }
1805 
1806 template <> inline std::string get_device_info_host<info::device::name>() {
1807  return "SYCL host device";
1808 }
1809 
1810 template <> inline std::string get_device_info_host<info::device::vendor>() {
1811  return "";
1812 }
1813 
1814 template <>
1815 inline std::string get_device_info_host<info::device::driver_version>() {
1816  return "1.2";
1817 }
1818 
1819 template <> inline std::string get_device_info_host<info::device::profile>() {
1820  return "FULL PROFILE";
1821 }
1822 
1823 template <> inline std::string get_device_info_host<info::device::version>() {
1824  return "1.2";
1825 }
1826 
1827 template <>
1828 inline std::string get_device_info_host<info::device::opencl_c_version>() {
1829  return "not applicable";
1830 }
1831 
1832 template <>
1833 inline std::vector<std::string>
1834 get_device_info_host<info::device::extensions>() {
1835  // TODO update when appropriate
1836  return {};
1837 }
1838 
1839 template <>
1840 inline size_t get_device_info_host<info::device::printf_buffer_size>() {
1841  // current value is the required minimum
1842  return 1024 * 1024;
1843 }
1844 
1845 template <>
1846 inline bool get_device_info_host<info::device::preferred_interop_user_sync>() {
1847  return false;
1848 }
1849 
1850 template <> inline device get_device_info_host<info::device::parent_device>() {
1851  throw invalid_object_error(
1852  "Partitioning to subdevices of the host device is not implemented",
1853  PI_ERROR_INVALID_DEVICE);
1854 }
1855 
1856 template <>
1857 inline uint32_t
1858 get_device_info_host<info::device::partition_max_sub_devices>() {
1859  // TODO update once subdevice creation is enabled
1860  return 1;
1861 }
1862 
1863 template <>
1864 inline std::vector<info::partition_property>
1865 get_device_info_host<info::device::partition_properties>() {
1866  // TODO update once subdevice creation is enabled
1867  return {};
1868 }
1869 
1870 template <>
1871 inline std::vector<info::partition_affinity_domain>
1872 get_device_info_host<info::device::partition_affinity_domains>() {
1873  // TODO update once subdevice creation is enabled
1874  return {};
1875 }
1876 
1877 template <>
1879 get_device_info_host<info::device::partition_type_property>() {
1881 }
1882 
1883 template <>
1885 get_device_info_host<info::device::partition_type_affinity_domain>() {
1886  // TODO update once subdevice creation is enabled
1888 }
1889 
1890 template <>
1891 inline uint32_t get_device_info_host<info::device::reference_count>() {
1892  // TODO update once subdevice creation is enabled
1893  return 1;
1894 }
1895 
1896 template <>
1897 inline uint32_t get_device_info_host<info::device::max_num_sub_groups>() {
1898  // TODO update once subgroups are enabled
1899  throw runtime_error("Sub-group feature is not supported on HOST device.",
1900  PI_ERROR_INVALID_DEVICE);
1901 }
1902 
1903 template <>
1904 inline std::vector<size_t>
1905 get_device_info_host<info::device::sub_group_sizes>() {
1906  // TODO update once subgroups are enabled
1907  throw runtime_error("Sub-group feature is not supported on HOST device.",
1908  PI_ERROR_INVALID_DEVICE);
1909 }
1910 
1911 template <>
1912 inline bool
1913 get_device_info_host<info::device::sub_group_independent_forward_progress>() {
1914  // TODO update once subgroups are enabled
1915  throw runtime_error("Sub-group feature is not supported on HOST device.",
1916  PI_ERROR_INVALID_DEVICE);
1917 }
1918 
1919 template <>
1920 inline bool get_device_info_host<info::device::kernel_kernel_pipe_support>() {
1921  return false;
1922 }
1923 
1924 template <>
1925 inline std::string get_device_info_host<info::device::backend_version>() {
1926  throw runtime_error(
1927  "Backend version feature is not supported on HOST device.",
1928  PI_ERROR_INVALID_DEVICE);
1929 }
1930 
1931 template <>
1932 inline bool get_device_info_host<info::device::usm_device_allocations>() {
1933  return true;
1934 }
1935 
1936 template <>
1937 inline bool get_device_info_host<info::device::usm_host_allocations>() {
1938  return true;
1939 }
1940 
1941 template <>
1942 inline bool get_device_info_host<info::device::usm_shared_allocations>() {
1943  return true;
1944 }
1945 
1946 template <>
1947 inline bool
1948 get_device_info_host<info::device::usm_restricted_shared_allocations>() {
1949  return true;
1950 }
1951 
1952 template <>
1953 inline bool get_device_info_host<info::device::usm_system_allocations>() {
1954  return true;
1955 }
1956 
1957 template <>
1958 inline bool get_device_info_host<info::device::ext_intel_mem_channel>() {
1959  return false;
1960 }
1961 
1962 // Specializations for intel extensions for Level Zero low-level
1963 // detail device descriptors (not support on host).
1964 template <>
1965 inline uint32_t get_device_info_host<ext::intel::info::device::device_id>() {
1966  throw runtime_error("Obtaining the device ID is not supported on HOST device",
1967  PI_ERROR_INVALID_DEVICE);
1968 }
1969 template <>
1970 inline std::string
1971 get_device_info_host<ext::intel::info::device::pci_address>() {
1972  throw runtime_error(
1973  "Obtaining the PCI address is not supported on HOST device",
1974  PI_ERROR_INVALID_DEVICE);
1975 }
1976 template <>
1977 inline uint32_t get_device_info_host<ext::intel::info::device::gpu_eu_count>() {
1978  throw runtime_error("Obtaining the EU count is not supported on HOST device",
1979  PI_ERROR_INVALID_DEVICE);
1980 }
1981 template <>
1982 inline uint32_t
1983 get_device_info_host<ext::intel::info::device::gpu_eu_simd_width>() {
1984  throw runtime_error(
1985  "Obtaining the EU SIMD width is not supported on HOST device",
1986  PI_ERROR_INVALID_DEVICE);
1987 }
1988 template <>
1989 inline uint32_t get_device_info_host<ext::intel::info::device::gpu_slices>() {
1990  throw runtime_error(
1991  "Obtaining the number of slices is not supported on HOST device",
1992  PI_ERROR_INVALID_DEVICE);
1993 }
1994 template <>
1995 inline uint32_t
1996 get_device_info_host<ext::intel::info::device::gpu_subslices_per_slice>() {
1997  throw runtime_error("Obtaining the number of subslices per slice is not "
1998  "supported on HOST device",
1999  PI_ERROR_INVALID_DEVICE);
2000 }
2001 template <>
2002 inline uint32_t
2003 get_device_info_host<ext::intel::info::device::gpu_eu_count_per_subslice>() {
2004  throw runtime_error(
2005  "Obtaining the EU count per subslice is not supported on HOST device",
2006  PI_ERROR_INVALID_DEVICE);
2007 }
2008 template <>
2009 inline uint32_t
2010 get_device_info_host<ext::intel::info::device::gpu_hw_threads_per_eu>() {
2011  throw runtime_error(
2012  "Obtaining the HW threads count per EU is not supported on HOST device",
2013  PI_ERROR_INVALID_DEVICE);
2014 }
2015 template <>
2016 inline uint64_t
2017 get_device_info_host<ext::intel::info::device::max_mem_bandwidth>() {
2018  throw runtime_error(
2019  "Obtaining the maximum memory bandwidth is not supported on HOST device",
2020  PI_ERROR_INVALID_DEVICE);
2021 }
2022 template <>
2024 get_device_info_host<ext::intel::info::device::uuid>() {
2025  throw runtime_error(
2026  "Obtaining the device uuid is not supported on HOST device",
2027  PI_ERROR_INVALID_DEVICE);
2028 }
2029 
2030 // TODO: Remove with deprecated feature
2031 // device::get_info<info::device::ext_intel_pci_address>()
2032 template <>
2033 inline std::string get_device_info_host<info::device::ext_intel_pci_address>() {
2034  throw runtime_error(
2035  "Obtaining the PCI address is not supported on HOST device",
2036  PI_ERROR_INVALID_DEVICE);
2037 }
2038 // TODO: Remove with deprecated feature
2039 // device::get_info<info::device::ext_intel_gpu_eu_count>()
2040 template <>
2041 inline uint32_t get_device_info_host<info::device::ext_intel_gpu_eu_count>() {
2042  throw runtime_error("Obtaining the EU count is not supported on HOST device",
2043  PI_ERROR_INVALID_DEVICE);
2044 }
2045 // TODO: Remove with deprecated feature
2046 // device::get_info<info::device::ext_intel_gpu_eu_simd_width>()
2047 template <>
2048 inline uint32_t
2049 get_device_info_host<info::device::ext_intel_gpu_eu_simd_width>() {
2050  throw runtime_error(
2051  "Obtaining the EU SIMD width is not supported on HOST device",
2052  PI_ERROR_INVALID_DEVICE);
2053 }
2054 // TODO: Remove with deprecated feature
2055 // device::get_info<info::device::ext_intel_gpu_slices>()
2056 template <>
2057 inline uint32_t get_device_info_host<info::device::ext_intel_gpu_slices>() {
2058  throw runtime_error(
2059  "Obtaining the number of slices is not supported on HOST device",
2060  PI_ERROR_INVALID_DEVICE);
2061 }
2062 // TODO: Remove with deprecated feature
2063 // device::get_info<info::device::ext_intel_gpu_subslices_per_slice>()
2064 template <>
2065 inline uint32_t
2066 get_device_info_host<info::device::ext_intel_gpu_subslices_per_slice>() {
2067  throw runtime_error("Obtaining the number of subslices per slice is not "
2068  "supported on HOST device",
2069  PI_ERROR_INVALID_DEVICE);
2070 }
2071 // TODO: Remove with deprecated feature
2072 // device::get_info<info::device::ext_intel_gpu_eu_count_per_subslices>()
2073 template <>
2074 inline uint32_t
2075 get_device_info_host<info::device::ext_intel_gpu_eu_count_per_subslice>() {
2076  throw runtime_error(
2077  "Obtaining the EU count per subslice is not supported on HOST device",
2078  PI_ERROR_INVALID_DEVICE);
2079 }
2080 // TODO: Remove with deprecated feature
2081 // device::get_info<info::device::ext_intel_gpu_hw_threads_per_eu>()
2082 template <>
2083 inline uint32_t
2084 get_device_info_host<info::device::ext_intel_gpu_hw_threads_per_eu>() {
2085  throw runtime_error(
2086  "Obtaining the HW threads count per EU is not supported on HOST device",
2087  PI_ERROR_INVALID_DEVICE);
2088 }
2089 // TODO: Remove with deprecated feature
2090 // device::get_info<info::device::ext_intel_max_mem_bandwidth>()
2091 template <>
2092 inline uint64_t
2093 get_device_info_host<info::device::ext_intel_max_mem_bandwidth>() {
2094  throw runtime_error(
2095  "Obtaining the maximum memory bandwidth is not supported on HOST device",
2096  PI_ERROR_INVALID_DEVICE);
2097 }
2098 // TODO:Move to namespace ext::intel::info::device
2099 template <> inline bool get_device_info_host<info::device::ext_oneapi_srgb>() {
2100  return false;
2101 }
2102 
2103 // TODO: Remove with deprecated feature
2104 // device::get_info<info::device::ext_intel_device_info_uuid>()
2105 template <>
2107 get_device_info_host<info::device::ext_intel_device_info_uuid>() {
2108  throw runtime_error(
2109  "Obtaining the device uuid is not supported on HOST device",
2110  PI_ERROR_INVALID_DEVICE);
2111 }
2112 
2113 template <>
2114 inline uint64_t get_device_info_host<ext::intel::info::device::free_memory>() {
2115  throw runtime_error(
2116  "Obtaining the device free memory is not supported on HOST device",
2117  PI_ERROR_INVALID_DEVICE);
2118 }
2119 
2120 template <>
2121 inline uint32_t
2122 get_device_info_host<ext::intel::info::device::memory_clock_rate>() {
2123  throw runtime_error(
2124  "Obtaining the device memory clock rate is not supported on HOST device",
2125  PI_ERROR_INVALID_DEVICE);
2126 }
2127 
2128 template <>
2129 inline uint32_t
2130 get_device_info_host<ext::intel::info::device::memory_bus_width>() {
2131  throw runtime_error(
2132  "Obtaining the device memory bus width is not supported on HOST device",
2133  PI_ERROR_INVALID_DEVICE);
2134 }
2135 
2136 template <>
2137 inline int32_t
2138 get_device_info_host<ext::intel::info::device::max_compute_queue_indices>() {
2139  throw runtime_error(
2140  "Obtaining max compute queue indices is not supported on HOST device",
2141  PI_ERROR_INVALID_DEVICE);
2142 }
2143 
2144 template <>
2146  ext::codeplay::experimental::info::device::supports_fusion>() {
2147  // No support for fusion on the host device.
2148  return false;
2149 }
2150 
2151 template <>
2152 inline uint32_t get_device_info_host<
2153  ext::codeplay::experimental::info::device::max_registers_per_work_group>() {
2154  throw runtime_error("Obtaining the maximum number of available registers per "
2155  "work-group is not supported on HOST device",
2156  PI_ERROR_INVALID_DEVICE);
2157 }
2158 
2159 template <>
2160 inline uint32_t get_device_info_host<
2161  ext::oneapi::experimental::info::device::image_row_pitch_align>() {
2162  throw runtime_error("Obtaining image pitch alignment is not "
2163  "supported on HOST device",
2164  PI_ERROR_INVALID_DEVICE);
2165 }
2166 
2167 template <>
2168 inline uint32_t get_device_info_host<
2169  ext::oneapi::experimental::info::device::max_image_linear_row_pitch>() {
2170  throw runtime_error("Obtaining max image linear pitch is not "
2171  "supported on HOST device",
2172  PI_ERROR_INVALID_DEVICE);
2173 }
2174 
2175 template <>
2176 inline std::vector<ext::oneapi::experimental::matrix::combination>
2178  ext::oneapi::experimental::info::device::matrix_combinations>() {
2179  throw runtime_error("Obtaining matrix combinations is not "
2180  "supported on HOST device",
2181  PI_ERROR_INVALID_DEVICE);
2182 }
2183 
2184 template <>
2185 inline uint32_t get_device_info_host<
2186  ext::oneapi::experimental::info::device::max_image_linear_width>() {
2187  throw runtime_error("Obtaining max image linear width is not "
2188  "supported on HOST device",
2189  PI_ERROR_INVALID_DEVICE);
2190 }
2191 
2192 template <>
2193 inline uint32_t get_device_info_host<
2194  ext::oneapi::experimental::info::device::max_image_linear_height>() {
2195  throw runtime_error("Obtaining max image linear height is not "
2196  "supported on HOST device",
2197  PI_ERROR_INVALID_DEVICE);
2198 }
2199 
2200 template <>
2202  ext::oneapi::experimental::info::device::mipmap_max_anisotropy>() {
2203  throw runtime_error("Bindless image mipmaps are not supported on HOST device",
2204  PI_ERROR_INVALID_DEVICE);
2205 }
2206 
2207 template <>
2208 inline std::vector<sycl::device> get_device_info_host<
2209  ext::oneapi::experimental::info::device::component_devices>() {
2210  throw runtime_error("Host devices cannot be component devices.",
2211  PI_ERROR_INVALID_DEVICE);
2212 }
2213 
2214 template <>
2216  ext::oneapi::experimental::info::device::composite_device>() {
2217  throw runtime_error("Host devices cannot be composite devices.",
2218  PI_ERROR_INVALID_DEVICE);
2219 }
2220 
2221 // Returns the list of all progress guarantees that can be requested for
2222 // work_groups from the coordination level of root_group when using host device.
2223 // First it calls getHostProgressGuarantee to get the strongest guarantee
2224 // available and then calls getProgressGuaranteesUpTo to get a list of all
2225 // guarantees that are either equal to the strongest guarantee or weaker than
2226 // it. The next 5 definitions follow the same model but for different scopes.
2227 template <>
2228 inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
2232 
2234  using ReturnT =
2235  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
2236  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2237  device_impl::getHostProgressGuarantee(execution_scope::work_group,
2238  execution_scope::root_group));
2239 }
2240 
2241 template <>
2242 inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
2246 
2248  using ReturnT =
2249  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
2250  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2251  device_impl::getHostProgressGuarantee(execution_scope::sub_group,
2252  execution_scope::root_group));
2253 }
2254 
2255 template <>
2256 inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
2258  ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
2261  using ReturnT =
2262  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
2263  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2264  device_impl::getHostProgressGuarantee(execution_scope::sub_group,
2265  execution_scope::work_group));
2266 }
2267 
2268 template <>
2269 inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
2271  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
2273 
2275  using ReturnT =
2276  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
2277  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2278  device_impl::getHostProgressGuarantee(execution_scope::work_item,
2279  execution_scope::root_group));
2280 }
2281 
2282 template <>
2283 inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
2285  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
2288  using ReturnT =
2289  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
2290  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2291  device_impl::getHostProgressGuarantee(execution_scope::work_item,
2292  execution_scope::work_group));
2293 }
2294 
2295 template <>
2296 inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
2298  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
2301  using ReturnT =
2302  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
2303  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2304  device_impl::getHostProgressGuarantee(execution_scope::work_item,
2305  execution_scope::sub_group));
2306 }
2307 
2308 // Returns the list of all progress guarantees that can be requested for
2309 // work_groups from the coordination level of root_group when using the device
2310 // given by Dev. First it calls getProgressGuarantee to get the strongest
2311 // guarantee available and then calls getProgressGuaranteesUpTo to get a list of
2312 // all guarantees that are either equal to the strongest guarantee or weaker
2313 // than it. The next 5 definitions follow the same model but for different
2314 // scopes.
2315 template <typename ReturnT>
2317  ReturnT,
2318  ext::oneapi::experimental::info::device::work_group_progress_capabilities<
2319  ext::oneapi::experimental::execution_scope::root_group>> {
2320  static ReturnT get(const DeviceImplPtr &Dev) {
2322  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2323  Dev->getProgressGuarantee(execution_scope::work_group,
2324  execution_scope::root_group));
2325  }
2326 };
2327 template <typename ReturnT>
2329  ReturnT,
2330  ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
2331  ext::oneapi::experimental::execution_scope::root_group>> {
2332  static ReturnT get(const DeviceImplPtr &Dev) {
2334  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2335  Dev->getProgressGuarantee(execution_scope::sub_group,
2336  execution_scope::root_group));
2337  }
2338 };
2339 
2340 template <typename ReturnT>
2342  ReturnT,
2343  ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
2344  ext::oneapi::experimental::execution_scope::work_group>> {
2345  static ReturnT get(const DeviceImplPtr &Dev) {
2346 
2348  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2349  Dev->getProgressGuarantee(execution_scope::sub_group,
2350  execution_scope::work_group));
2351  }
2352 };
2353 
2354 template <typename ReturnT>
2356  ReturnT,
2357  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
2358  ext::oneapi::experimental::execution_scope::root_group>> {
2359  static ReturnT get(const DeviceImplPtr &Dev) {
2360 
2362  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2363  Dev->getProgressGuarantee(execution_scope::work_item,
2364  execution_scope::root_group));
2365  }
2366 };
2367 template <typename ReturnT>
2369  ReturnT,
2370  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
2371  ext::oneapi::experimental::execution_scope::work_group>> {
2372  static ReturnT get(const DeviceImplPtr &Dev) {
2373 
2375  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2376  Dev->getProgressGuarantee(execution_scope::work_item,
2377  execution_scope::work_group));
2378  }
2379 };
2380 
2381 template <typename ReturnT>
2383  ReturnT,
2384  ext::oneapi::experimental::info::device::work_item_progress_capabilities<
2385  ext::oneapi::experimental::execution_scope::sub_group>> {
2386  static ReturnT get(const DeviceImplPtr &Dev) {
2387 
2389  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
2390  Dev->getProgressGuarantee(execution_scope::work_item,
2391  execution_scope::sub_group));
2392  }
2393 };
2394 
2395 } // namespace detail
2396 } // namespace _V1
2397 } // namespace sycl
static size_t getOSMemSize()
Returns the amount of RAM available for the operating system.
Definition: os_util.cpp:203
static ProgramManager & getInstance()
std::string get_device_info_string(sycl::detail::pi::PiDeviceInfo InfoCode) const
Get device info string.
const PluginPtr & getPlugin() const
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
Definition: device_impl.hpp:67
static sycl::ext::oneapi::experimental::forward_progress_guarantee getHostProgressGuarantee(sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope)
static jit_compiler & get_instance()
static std::shared_ptr< platform_impl > getHostPlatformImpl()
Static functions that help maintain platform uniquess and equality of comparison.
static std::shared_ptr< platform_impl > getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, const PluginPtr &Plugin)
Queries the cache to see if the specified PiPlatform has been seen before.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
const std::error_code & code() const noexcept
Definition: exception.cpp:69
A unique identifier of an item in an index space.
Definition: id.hpp:36
Encapsulates a SYCL platform on which kernels may be executed.
Definition: platform.hpp:99
detail::is_platform_info_desc< Param >::return_type get_info() const
Queries this SYCL platform for info.
Definition: platform.hpp:184
#define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN)
::pi_device PiDevice
Definition: pi.hpp:114
::pi_platform PiPlatform
Definition: pi.hpp:112
std::vector< memory_scope > readMemoryScopeBitfield(pi_memory_scope_capabilities bits)
range< 2 > construct_range< 2 >(size_t *values)
std::vector< info::fp_config > read_fp_bitfield(pi_device_fp_config bits)
Definition: device_info.hpp:37
range< Dimensions > construct_range(size_t *values)=delete
constexpr std::pair< const int, oneapi_exp_arch > IntelGPUArchitectures[]
constexpr std::pair< const int, oneapi_exp_arch > IntelCPUArchitectures[]
range< 3 > construct_range< 3 >(size_t *values)
std::string affinityDomainToString(info::partition_affinity_domain AffinityDomain)
Definition: device_info.hpp:87
static bool is_sycl_partition_property(info::partition_property PP)
Param::return_type get_device_info_host()=delete
std::vector< info::partition_affinity_domain > read_domain_bitfield(pi_device_affinity_domain bits)
Definition: device_info.hpp:59
std::vector< std::string > split_string(std::string_view str, char delimeter)
std::shared_ptr< device_impl > DeviceImplPtr
Param::return_type get_device_info(const DeviceImplPtr &Dev)
constexpr std::pair< const char *, oneapi_exp_arch > NvidiaAmdGPUArchitectures[]
std::vector< memory_order > readMemoryOrderBitfield(pi_memory_order_capabilities bits)
std::array< unsigned char, 16 > uuid_type
std::vector< info::execution_capability > read_execution_bitfield(pi_device_exec_capabilities bits)
Definition: device_info.hpp:77
range< 1 > construct_range< 1 >(size_t *values)
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
auto autodecltype(a) b
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:93
Definition: access.hpp:18
static constexpr pi_device_fp_config PI_FP_SOFT_FLOAT
Definition: pi.h:930
pi_bitfield pi_device_exec_capabilities
Definition: pi.h:760
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE
Definition: pi.h:920
_pi_result
Definition: pi.h:250
pi_uint32 pi_bool
Definition: pi.h:241
_pi_usm_capabilities pi_usm_capabilities
Definition: pi.h:2080
_pi_device_info
Definition: pi.h:330
@ PI_DEVICE_INFO_PARTITION_TYPE
Definition: pi.h:406
static constexpr pi_device_fp_config PI_FP_DENORM
Definition: pi.h:924
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE
Definition: pi.h:915
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE
Definition: pi.h:913
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:78
_pi_usm_capabilities
Definition: pi.h:2051
@ PI_USM_ACCESS
Definition: pi.h:2052
@ PI_USM_CONCURRENT_ACCESS
Definition: pi.h:2054
pi_bitfield pi_queue_properties
Definition: pi.h:835
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE
Definition: pi.h:911
static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST
Definition: pi.h:926
static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF
Definition: pi.h:928
_pi_device * pi_device
Definition: pi.h:1245
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
Definition: pi.h:840
static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
Definition: pi.h:931
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE
Definition: pi.h:917
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA
Definition: pi.h:909
pi_bitfield pi_memory_scope_capabilities
Definition: pi.h:782
pi_bitfield pi_device_fp_config
Definition: pi.h:923
static constexpr pi_device_exec_capabilities PI_EXEC_NATIVE_KERNEL
Definition: pi.h:937
static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO
Definition: pi.h:927
static constexpr pi_device_fp_config PI_FP_FMA
Definition: pi.h:929
static constexpr pi_device_exec_capabilities PI_EXEC_KERNEL
Definition: pi.h:936
static constexpr pi_device_fp_config PI_FP_INF_NAN
Definition: pi.h:925
pi_bitfield pi_device_affinity_domain
Definition: pi.h:908
pi_bitfield pi_memory_order_capabilities
Definition: pi.h:775
C++ wrapper of extern "C" PI interfaces.
static uint32_t getMemCacheLineSize()
static uint32_t getNativeVectorWidth(TypeIndex Index)
Returns the maximum vector width counted in elements of the given type.
static uint32_t getMaxClockFrequency()
static platform get(const DeviceImplPtr &Dev)
static std::string get(const DeviceImplPtr &Dev)
static std::vector< info::fp_config > get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)