DPC++ Runtime
Runtime libraries for oneAPI DPC++
device_impl.cpp
Go to the documentation of this file.
1 //==----------------- device_impl.cpp - SYCL device ------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/device_impl.hpp>
10 #include <detail/device_info.hpp>
11 #include <detail/platform_impl.hpp>
12 #include <sycl/detail/ur.hpp>
13 #include <sycl/device.hpp>
14 
15 #include <algorithm>
16 
17 namespace sycl {
18 inline namespace _V1 {
19 namespace detail {
20 
21 device_impl::device_impl(ur_native_handle_t InteropDeviceHandle,
22  const PluginPtr &Plugin)
23  : device_impl(InteropDeviceHandle, nullptr, nullptr, Plugin) {}
24 
27 device_impl::device_impl(ur_device_handle_t Device, PlatformImplPtr Platform)
28  : device_impl(0, Device, Platform, Platform->getPlugin()) {}
29 
32 device_impl::device_impl(ur_device_handle_t Device, const PluginPtr &Plugin)
33  : device_impl(0, Device, nullptr, Plugin) {}
34 
35 device_impl::device_impl(ur_native_handle_t InteropDeviceHandle,
36  ur_device_handle_t Device, PlatformImplPtr Platform,
37  const PluginPtr &Plugin)
38  : MDevice(Device), MDeviceHostBaseTime(std::make_pair(0, 0)) {
39  bool InteroperabilityConstructor = false;
40  if (Device == nullptr) {
41  assert(InteropDeviceHandle);
42  // Get UR device from the raw device handle.
43  // NOTE: this is for OpenCL interop only (and should go away).
44  // With SYCL-2020 BE generalization "make" functions are used instead.
45  Plugin->call<UrApiKind::urDeviceCreateWithNativeHandle>(
46  InteropDeviceHandle, Plugin->getUrAdapter(), nullptr, &MDevice);
47  InteroperabilityConstructor = true;
48  }
49 
50  // TODO catch an exception and put it to list of asynchronous exceptions
51  Plugin->call<UrApiKind::urDeviceGetInfo>(
52  MDevice, UR_DEVICE_INFO_TYPE, sizeof(ur_device_type_t), &MType, nullptr);
53 
54  // No need to set MRootDevice when MAlwaysRootDevice is true
55  if ((Platform == nullptr) || !Platform->MAlwaysRootDevice) {
56  // TODO catch an exception and put it to list of asynchronous exceptions
57  Plugin->call<UrApiKind::urDeviceGetInfo>(
58  MDevice, UR_DEVICE_INFO_PARENT_DEVICE, sizeof(ur_device_handle_t),
59  &MRootDevice, nullptr);
60  }
61 
62  if (!InteroperabilityConstructor) {
63  // TODO catch an exception and put it to list of asynchronous exceptions
64  // Interoperability Constructor already calls DeviceRetain in
65  // urDeviceCreateWithNativeHandle.
66  Plugin->call<UrApiKind::urDeviceRetain>(MDevice);
67  }
68 
69  // set MPlatform
70  if (!Platform) {
71  Platform = platform_impl::getPlatformFromUrDevice(MDevice, Plugin);
72  }
73  MPlatform = Platform;
74 
75  MIsAssertFailSupported =
77 }
78 
80  try {
81  // TODO catch an exception and put it to list of asynchronous exceptions
82  const PluginPtr &Plugin = getPlugin();
83  ur_result_t Err = Plugin->call_nocheck<UrApiKind::urDeviceRelease>(MDevice);
85  } catch (std::exception &e) {
86  __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_impl", e);
87  }
88 }
89 
91  info::partition_affinity_domain AffinityDomain) const {
92  auto SupportedDomains = get_info<info::device::partition_affinity_domains>();
93  return std::find(SupportedDomains.begin(), SupportedDomains.end(),
94  AffinityDomain) != SupportedDomains.end();
95 }
96 
97 cl_device_id device_impl::get() const {
98  // TODO catch an exception and put it to list of asynchronous exceptions
99  getPlugin()->call<UrApiKind::urDeviceRetain>(MDevice);
100  return ur::cast<cl_device_id>(getNative());
101 }
102 
104  return createSyclObjFromImpl<platform>(MPlatform);
105 }
106 
107 template <typename Param>
108 typename Param::return_type device_impl::get_info() const {
109  return get_device_info<Param>(
110  MPlatform->getOrMakeDeviceImpl(MDevice, MPlatform));
111 }
112 // Explicitly instantiate all device info traits
113 #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \
114  template ReturnT device_impl::get_info<info::device::Desc>() const;
115 
116 #define __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(DescType, Desc, ReturnT, PiCode) \
117  template ReturnT device_impl::get_info<info::device::Desc>() const;
118 
119 #include <sycl/info/device_traits.def>
120 #undef __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED
121 #undef __SYCL_PARAM_TRAITS_SPEC
122 
123 #define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \
124  template __SYCL_EXPORT ReturnT \
125  device_impl::get_info<Namespace::info::DescType::Desc>() const;
126 
127 #include <sycl/info/ext_codeplay_device_traits.def>
128 #include <sycl/info/ext_intel_device_traits.def>
129 #include <sycl/info/ext_oneapi_device_traits.def>
130 #undef __SYCL_PARAM_TRAITS_SPEC
131 
132 template <>
133 typename info::platform::version::return_type
134 device_impl::get_backend_info<info::platform::version>() const {
135  if (getBackend() != backend::opencl) {
137  "the info::platform::version info descriptor can "
138  "only be queried with an OpenCL backend");
139  }
140  return get_platform().get_info<info::platform::version>();
141 }
142 
143 template <>
144 typename info::device::version::return_type
145 device_impl::get_backend_info<info::device::version>() const {
146  if (getBackend() != backend::opencl) {
148  "the info::device::version info descriptor can only "
149  "be queried with an OpenCL backend");
150  }
151  return get_info<info::device::version>();
152 }
153 
154 template <>
155 typename info::device::backend_version::return_type
156 device_impl::get_backend_info<info::device::backend_version>() const {
157  if (getBackend() != backend::ext_oneapi_level_zero) {
159  "the info::device::backend_version info descriptor "
160  "can only be queried with a Level Zero backend");
161  }
162  return "";
163  // Currently The Level Zero backend does not define the value of this
164  // information descriptor and implementations are encouraged to return the
165  // empty string as per specification.
166 }
167 
168 bool device_impl::has_extension(const std::string &ExtensionName) const {
169  std::string AllExtensionNames =
170  get_device_info_string(UR_DEVICE_INFO_EXTENSIONS);
171  return (AllExtensionNames.find(ExtensionName) != std::string::npos);
172 }
173 
175  auto SupportedProperties = get_info<info::device::partition_properties>();
176  return std::find(SupportedProperties.begin(), SupportedProperties.end(),
177  Prop) != SupportedProperties.end();
178 }
179 
181  const ur_device_partition_properties_t *Properties,
182  size_t SubDevicesCount) const {
183  std::vector<ur_device_handle_t> SubDevices(SubDevicesCount);
184  uint32_t ReturnedSubDevices = 0;
185  const PluginPtr &Plugin = getPlugin();
186  Plugin->call<sycl::errc::invalid, UrApiKind::urDevicePartition>(
187  MDevice, Properties, SubDevicesCount, SubDevices.data(),
188  &ReturnedSubDevices);
189  if (ReturnedSubDevices != SubDevicesCount) {
190  throw sycl::exception(
192  "Could not partition to the specified number of sub-devices");
193  }
194  // TODO: Need to describe the subdevice model. Some sub_device management
195  // may be necessary. What happens if create_sub_devices is called multiple
196  // times with the same arguments?
197  //
198  std::vector<device> res;
199  std::for_each(SubDevices.begin(), SubDevices.end(),
200  [&res, this](const ur_device_handle_t &a_ur_device) {
201  device sycl_device = detail::createSyclObjFromImpl<device>(
202  MPlatform->getOrMakeDeviceImpl(a_ur_device, MPlatform));
203  res.push_back(sycl_device);
204  });
205  return res;
206 }
207 
208 std::vector<device> device_impl::create_sub_devices(size_t ComputeUnits) const {
211  "Device does not support "
212  "sycl::info::partition_property::partition_equally.");
213  }
214  // If count exceeds the total number of compute units in the device, an
215  // exception with the errc::invalid error code must be thrown.
216  auto MaxComputeUnits = get_info<info::device::max_compute_units>();
217  if (ComputeUnits > MaxComputeUnits)
219  "Total counts exceed max compute units");
220 
221  size_t SubDevicesCount = MaxComputeUnits / ComputeUnits;
222 
223  ur_device_partition_property_t Prop{};
224  Prop.type = UR_DEVICE_PARTITION_EQUALLY;
225  Prop.value.count = static_cast<uint32_t>(ComputeUnits);
226 
227  ur_device_partition_properties_t Properties{};
228  Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES;
229  Properties.PropCount = 1;
230  Properties.pProperties = &Prop;
231 
232  return create_sub_devices(&Properties, SubDevicesCount);
233 }
234 
235 std::vector<device>
236 device_impl::create_sub_devices(const std::vector<size_t> &Counts) const {
238  throw sycl::exception(
240  "Device does not support "
241  "sycl::info::partition_property::partition_by_counts.");
242  }
243 
244  std::vector<ur_device_partition_property_t> Props{};
245 
246  // Fill the properties vector with counts and validate it
247  size_t TotalCounts = 0;
248  size_t NonZeroCounts = 0;
249  for (auto Count : Counts) {
250  TotalCounts += Count;
251  NonZeroCounts += (Count != 0) ? 1 : 0;
252  Props.push_back(ur_device_partition_property_t{
253  UR_DEVICE_PARTITION_BY_COUNTS, {static_cast<uint32_t>(Count)}});
254  }
255 
256  ur_device_partition_properties_t Properties{};
257  Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES;
258  Properties.pProperties = Props.data();
259  Properties.PropCount = Props.size();
260 
261  // If the number of non-zero values in counts exceeds the device’s maximum
262  // number of sub devices (as returned by info::device::
263  // partition_max_sub_devices) an exception with the errc::invalid
264  // error code must be thrown.
265  if (NonZeroCounts > get_info<info::device::partition_max_sub_devices>())
267  "Total non-zero counts exceed max sub-devices");
268 
269  // If the total of all the values in the counts vector exceeds the total
270  // number of compute units in the device (as returned by
271  // info::device::max_compute_units), an exception with the errc::invalid
272  // error code must be thrown.
273  if (TotalCounts > get_info<info::device::max_compute_units>())
275  "Total counts exceed max compute units");
276 
277  return create_sub_devices(&Properties, Counts.size());
278 }
279 
281  info::partition_affinity_domain AffinityDomain) const {
284  throw sycl::exception(
286  "Device does not support "
287  "sycl::info::partition_property::partition_by_affinity_domain.");
288  }
289  if (!is_affinity_supported(AffinityDomain)) {
291  "Device does not support " +
292  affinityDomainToString(AffinityDomain) + ".");
293  }
294 
295  ur_device_partition_property_t Prop;
296  Prop.type = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
297  Prop.value.affinity_domain =
298  static_cast<ur_device_affinity_domain_flags_t>(AffinityDomain);
299 
300  ur_device_partition_properties_t Properties{};
301  Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES;
302  Properties.PropCount = 1;
303  Properties.pProperties = &Prop;
304 
305  uint32_t SubDevicesCount = 0;
306  const PluginPtr &Plugin = getPlugin();
307  Plugin->call<sycl::errc::invalid, UrApiKind::urDevicePartition>(
308  MDevice, &Properties, 0, nullptr, &SubDevicesCount);
309 
310  return create_sub_devices(&Properties, SubDevicesCount);
311 }
312 
313 std::vector<device> device_impl::create_sub_devices() const {
316  throw sycl::exception(
318  "Device does not support "
319  "sycl::info::partition_property::ext_intel_partition_by_cslice.");
320  }
321 
322  ur_device_partition_property_t Prop;
323  Prop.type = UR_DEVICE_PARTITION_BY_CSLICE;
324 
325  ur_device_partition_properties_t Properties{};
326  Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES;
327  Properties.pProperties = &Prop;
328  Properties.PropCount = 1;
329 
330  uint32_t SubDevicesCount = 0;
331  const PluginPtr &Plugin = getPlugin();
332  Plugin->call<UrApiKind::urDevicePartition>(MDevice, &Properties, 0, nullptr,
333  &SubDevicesCount);
334 
335  return create_sub_devices(&Properties, SubDevicesCount);
336 }
337 
338 ur_native_handle_t device_impl::getNative() const {
339  auto Plugin = getPlugin();
340  if (getBackend() == backend::opencl)
341  Plugin->call<UrApiKind::urDeviceRetain>(getHandleRef());
342  ur_native_handle_t Handle;
343  Plugin->call<UrApiKind::urDeviceGetNativeHandle>(getHandleRef(), &Handle);
344  return Handle;
345 }
346 
347 bool device_impl::has(aspect Aspect) const {
348  size_t return_size = 0;
349 
350  switch (Aspect) {
351  case aspect::host:
352  // Deprecated
353  return false;
354  case aspect::cpu:
355  return is_cpu();
356  case aspect::gpu:
357  return is_gpu();
358  case aspect::accelerator:
359  return is_accelerator();
360  case aspect::custom:
361  return false;
362  // TODO: Implement this for FPGA emulator.
363  case aspect::emulated:
364  return false;
365  case aspect::host_debuggable:
366  return false;
367  case aspect::fp16:
368  return has_extension("cl_khr_fp16");
369  case aspect::fp64:
370  return has_extension("cl_khr_fp64");
371  case aspect::int64_base_atomics:
372  return has_extension("cl_khr_int64_base_atomics");
373  case aspect::int64_extended_atomics:
374  return has_extension("cl_khr_int64_extended_atomics");
375  case aspect::atomic64:
376  return get_info<info::device::atomic64>();
377  case aspect::image:
378  return get_info<info::device::image_support>();
379  case aspect::online_compiler:
380  return get_info<info::device::is_compiler_available>();
381  case aspect::online_linker:
382  return get_info<info::device::is_linker_available>();
383  case aspect::queue_profiling:
384  return get_info<info::device::queue_profiling>();
385  case aspect::usm_device_allocations:
386  return get_info<info::device::usm_device_allocations>();
387  case aspect::usm_host_allocations:
388  return get_info<info::device::usm_host_allocations>();
389  case aspect::ext_intel_mem_channel:
390  return get_info<info::device::ext_intel_mem_channel>();
391  case aspect::ext_oneapi_cuda_cluster_group:
392  return get_info<info::device::ext_oneapi_cuda_cluster_group>();
393  case aspect::usm_atomic_host_allocations:
394  return (get_device_info_impl<ur_device_usm_access_capability_flags_t,
395  info::device::usm_host_allocations>::
396  get(MPlatform->getDeviceImpl(MDevice)) &
397  UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
398  case aspect::usm_shared_allocations:
399  return get_info<info::device::usm_shared_allocations>();
400  case aspect::usm_atomic_shared_allocations:
401  return (get_device_info_impl<ur_device_usm_access_capability_flags_t,
402  info::device::usm_shared_allocations>::
403  get(MPlatform->getDeviceImpl(MDevice)) &
404  UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
405  case aspect::usm_restricted_shared_allocations:
406  return get_info<info::device::usm_restricted_shared_allocations>();
407  case aspect::usm_system_allocations:
408  return get_info<info::device::usm_system_allocations>();
409  case aspect::ext_intel_device_id:
410  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
411  MDevice, UR_DEVICE_INFO_DEVICE_ID, 0, nullptr, &return_size) ==
412  UR_RESULT_SUCCESS;
413  case aspect::ext_intel_pci_address:
414  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
415  MDevice, UR_DEVICE_INFO_PCI_ADDRESS, 0, nullptr, &return_size) ==
416  UR_RESULT_SUCCESS;
417  case aspect::ext_intel_gpu_eu_count:
418  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
419  MDevice, UR_DEVICE_INFO_GPU_EU_COUNT, 0, nullptr,
420  &return_size) == UR_RESULT_SUCCESS;
421  case aspect::ext_intel_gpu_eu_simd_width:
422  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
423  MDevice, UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH, 0, nullptr,
424  &return_size) == UR_RESULT_SUCCESS;
425  case aspect::ext_intel_gpu_slices:
426  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
427  MDevice, UR_DEVICE_INFO_GPU_EU_SLICES, 0, nullptr,
428  &return_size) == UR_RESULT_SUCCESS;
429  case aspect::ext_intel_gpu_subslices_per_slice:
430  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
431  MDevice, UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, 0, nullptr,
432  &return_size) == UR_RESULT_SUCCESS;
433  case aspect::ext_intel_gpu_eu_count_per_subslice:
434  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
435  MDevice, UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, 0, nullptr,
436  &return_size) == UR_RESULT_SUCCESS;
437  case aspect::ext_intel_gpu_hw_threads_per_eu:
438  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
439  MDevice, UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU, 0, nullptr,
440  &return_size) == UR_RESULT_SUCCESS;
441  case aspect::ext_intel_free_memory:
442  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
443  MDevice, UR_DEVICE_INFO_GLOBAL_MEM_FREE, 0, nullptr,
444  &return_size) == UR_RESULT_SUCCESS;
445  case aspect::ext_intel_memory_clock_rate:
446  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
447  MDevice, UR_DEVICE_INFO_MEMORY_CLOCK_RATE, 0, nullptr,
448  &return_size) == UR_RESULT_SUCCESS;
449  case aspect::ext_intel_memory_bus_width:
450  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
451  MDevice, UR_DEVICE_INFO_MEMORY_BUS_WIDTH, 0, nullptr,
452  &return_size) == UR_RESULT_SUCCESS;
453  case aspect::ext_intel_device_info_uuid: {
454  auto Result = getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
455  MDevice, UR_DEVICE_INFO_UUID, 0, nullptr, &return_size);
456  if (Result != UR_RESULT_SUCCESS) {
457  return false;
458  }
459 
460  assert(return_size <= 16);
461  unsigned char UUID[16];
462 
463  return getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
464  MDevice, UR_DEVICE_INFO_UUID, 16 * sizeof(unsigned char), UUID,
465  nullptr) == UR_RESULT_SUCCESS;
466  }
467  case aspect::ext_intel_max_mem_bandwidth:
468  // currently not supported
469  return false;
470  case aspect::ext_oneapi_srgb:
471  return get_info<info::device::ext_oneapi_srgb>();
472  case aspect::ext_oneapi_native_assert:
473  return isAssertFailSupported();
474  case aspect::ext_oneapi_cuda_async_barrier: {
475  int async_barrier_supported;
476  bool call_successful =
477  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
478  MDevice, UR_DEVICE_INFO_ASYNC_BARRIER, sizeof(int),
479  &async_barrier_supported, nullptr) == UR_RESULT_SUCCESS;
480  return call_successful && async_barrier_supported;
481  }
482  case aspect::ext_intel_legacy_image: {
483  ur_bool_t legacy_image_support = false;
484  bool call_successful =
485  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
486  MDevice, UR_DEVICE_INFO_IMAGE_SUPPORTED, sizeof(ur_bool_t),
487  &legacy_image_support, nullptr) == UR_RESULT_SUCCESS;
488  return call_successful && legacy_image_support;
489  }
490  case aspect::ext_oneapi_bindless_images: {
491  ur_bool_t support = false;
492  bool call_successful =
493  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
494  MDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP,
495  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
496  return call_successful && support;
497  }
498  case aspect::ext_oneapi_bindless_images_shared_usm: {
499  ur_bool_t support = false;
500  bool call_successful =
501  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
502  MDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP,
503  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
504  return call_successful && support;
505  }
506  case aspect::ext_oneapi_bindless_images_1d_usm: {
507  ur_bool_t support = false;
508  bool call_successful =
509  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
510  MDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP,
511  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
512  return call_successful && support;
513  }
514  case aspect::ext_oneapi_bindless_images_2d_usm: {
515  ur_bool_t support = false;
516  bool call_successful =
517  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
518  MDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP,
519  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
520  return call_successful && support;
521  }
522  case aspect::ext_oneapi_external_memory_import: {
523  ur_bool_t support = false;
524  bool call_successful =
525  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
526  MDevice, UR_DEVICE_INFO_EXTERNAL_MEMORY_IMPORT_SUPPORT_EXP,
527  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
528  return call_successful && support;
529  }
530  case aspect::ext_oneapi_external_semaphore_import: {
531  ur_bool_t support = false;
532  bool call_successful =
533  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
534  MDevice, UR_DEVICE_INFO_EXTERNAL_SEMAPHORE_IMPORT_SUPPORT_EXP,
535  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
536  return call_successful && support;
537  }
538  case aspect::ext_oneapi_mipmap: {
539  ur_bool_t support = false;
540  bool call_successful =
541  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
542  MDevice, UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP, sizeof(ur_bool_t),
543  &support, nullptr) == UR_RESULT_SUCCESS;
544  return call_successful && support;
545  }
546  case aspect::ext_oneapi_mipmap_anisotropy: {
547  ur_bool_t support = false;
548  bool call_successful =
549  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
550  MDevice, UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP,
551  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
552  return call_successful && support;
553  }
554  case aspect::ext_oneapi_mipmap_level_reference: {
555  ur_bool_t support = false;
556  bool call_successful =
557  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
558  MDevice, UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP,
559  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
560  return call_successful && support;
561  }
562  case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: {
563  ur_bool_t support = false;
564  bool call_successful =
565  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
566  MDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP,
567  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
568  return call_successful && support;
569  }
570  case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: {
571  ur_bool_t support = false;
572  bool call_successful =
573  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
574  MDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP,
575  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
576  return call_successful && support;
577  }
578  case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: {
579  ur_bool_t support = false;
580  bool call_successful =
581  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
582  MDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP,
583  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
584  return call_successful && support;
585  }
586  case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: {
587  ur_bool_t support = false;
588  bool call_successful =
589  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
590  MDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP,
591  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
592  return call_successful && support;
593  }
594  case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: {
595  ur_bool_t support = false;
596  bool call_successful =
597  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
598  MDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP,
599  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
600  return call_successful && support;
601  }
602  case aspect::ext_oneapi_cubemap: {
603  ur_bool_t support = false;
604  bool call_successful =
605  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
606  MDevice, UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP, sizeof(ur_bool_t),
607  &support, nullptr) == UR_RESULT_SUCCESS;
608  return call_successful && support;
609  }
610  case aspect::ext_oneapi_cubemap_seamless_filtering: {
611  ur_bool_t support = false;
612  bool call_successful =
613  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
614  MDevice, UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP,
615  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
616  return call_successful && support;
617  }
618  case aspect::ext_oneapi_image_array: {
619  ur_bool_t support = false;
620  bool call_successful =
621  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
622  MDevice, UR_DEVICE_INFO_IMAGE_ARRAY_SUPPORT_EXP, sizeof(ur_bool_t),
623  &support, nullptr) == UR_RESULT_SUCCESS;
624  return call_successful && support;
625  }
626  case aspect::ext_oneapi_unique_addressing_per_dim: {
627  ur_bool_t support = false;
628  bool call_successful =
629  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
630  MDevice, UR_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM_EXP,
631  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
632  return call_successful && support;
633  }
634  case aspect::ext_oneapi_bindless_images_sample_1d_usm: {
635  ur_bool_t support = false;
636  bool call_successful =
637  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
638  MDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP,
639  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
640  return call_successful && support;
641  }
642  case aspect::ext_oneapi_bindless_images_sample_2d_usm: {
643  ur_bool_t support = false;
644  bool call_successful =
645  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
646  MDevice, UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP,
647  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
648  return call_successful && support;
649  }
650  case aspect::ext_intel_esimd: {
651  ur_bool_t support = false;
652  bool call_successful =
653  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
654  MDevice, UR_DEVICE_INFO_ESIMD_SUPPORT, sizeof(ur_bool_t), &support,
655  nullptr) == UR_RESULT_SUCCESS;
656  return call_successful && support;
657  }
658  case aspect::ext_oneapi_ballot_group:
659  case aspect::ext_oneapi_fixed_size_group:
660  case aspect::ext_oneapi_opportunistic_group: {
661  return (this->getBackend() == backend::ext_oneapi_level_zero) ||
662  (this->getBackend() == backend::opencl) ||
664  }
665  case aspect::ext_oneapi_tangle_group: {
666  // TODO: tangle_group is not currently supported for CUDA devices. Add when
667  // implemented.
668  return (this->getBackend() == backend::ext_oneapi_level_zero) ||
669  (this->getBackend() == backend::opencl);
670  }
671  case aspect::ext_intel_matrix: {
673  const std::vector<arch> supported_archs = {
674  arch::intel_cpu_spr, arch::intel_cpu_gnr,
675  arch::intel_gpu_pvc, arch::intel_gpu_dg2_g10,
676  arch::intel_gpu_dg2_g11, arch::intel_gpu_dg2_g12};
677  try {
678  return std::any_of(
679  supported_archs.begin(), supported_archs.end(),
680  [=](const arch a) { return this->extOneapiArchitectureIs(a); });
681  } catch (const sycl::exception &) {
682  // If we're here it means the device does not support architecture
683  // querying
684  return false;
685  }
686  }
687  case aspect::ext_oneapi_is_composite: {
688  auto components = get_info<
689  sycl::ext::oneapi::experimental::info::device::component_devices>();
690  // Any device with ext_oneapi_is_composite aspect will have at least two
691  // constituent component devices.
692  return components.size() >= 2;
693  }
694  case aspect::ext_oneapi_is_component: {
695  typename sycl_to_ur<device>::type Result;
696  bool CallSuccessful =
697  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
698  getHandleRef(),
699  UrInfoCode<ext::oneapi::experimental::info::device::
700  composite_device>::value,
701  sizeof(Result), &Result, nullptr) == UR_RESULT_SUCCESS;
702 
703  return CallSuccessful && Result != nullptr;
704  }
705  case aspect::ext_oneapi_graph: {
706  bool SupportsCommandBufferUpdate = false;
707  bool CallSuccessful =
708  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
709  MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP,
710  sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate,
711  nullptr) == UR_RESULT_SUCCESS;
712  if (!CallSuccessful) {
713  return false;
714  }
715 
716  return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate;
717  }
718  case aspect::ext_oneapi_limited_graph: {
719  bool SupportsCommandBuffers = false;
720  bool CallSuccessful =
721  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
722  MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP,
723  sizeof(SupportsCommandBuffers), &SupportsCommandBuffers,
724  nullptr) == UR_RESULT_SUCCESS;
725  if (!CallSuccessful) {
726  return false;
727  }
728 
729  return SupportsCommandBuffers;
730  }
731  case aspect::ext_oneapi_private_alloca: {
732  // Extension only supported on SPIR-V targets.
733  backend be = getBackend();
734  return be == sycl::backend::ext_oneapi_level_zero ||
735  be == sycl::backend::opencl;
736  }
737  case aspect::ext_oneapi_queue_profiling_tag: {
738  ur_bool_t support = false;
739  bool call_successful =
740  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
741  MDevice, UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP,
742  sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS;
743  return call_successful && support;
744  }
745  case aspect::ext_oneapi_virtual_mem: {
746  ur_bool_t support = false;
747  bool call_successful =
748  getPlugin()->call_nocheck<UrApiKind::urDeviceGetInfo>(
749  MDevice, UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT, sizeof(ur_bool_t),
750  &support, nullptr) == UR_RESULT_SUCCESS;
751  return call_successful && support;
752  }
753  case aspect::ext_intel_fpga_task_sequence: {
754  return is_accelerator();
755  }
756  case aspect::ext_oneapi_atomic16: {
757  // Likely L0 doesn't check it properly. Need to double-check.
758  return has_extension("cl_ext_float_atomics");
759  }
760  }
761 
762  return false; // This device aspect has not been implemented yet.
763 }
764 
766  return MIsAssertFailSupported;
767 }
768 
769 std::string device_impl::getDeviceName() const {
770  std::call_once(MDeviceNameFlag,
771  [this]() { MDeviceName = get_info<info::device::name>(); });
772 
773  return MDeviceName;
774 }
775 
777  std::call_once(MDeviceArchFlag, [this]() {
778  MDeviceArch =
779  get_info<ext::oneapi::experimental::info::device::architecture>();
780  });
781 
782  return MDeviceArch;
783 }
784 
785 // On the first call this function queries for device timestamp
786 // along with host synchronized timestamp and stores it in member variable
787 // MDeviceHostBaseTime. Subsequent calls to this function would just retrieve
788 // the host timestamp, compute difference against the host timestamp in
789 // MDeviceHostBaseTime and calculate the device timestamp based on the
790 // difference.
791 //
792 // The MDeviceHostBaseTime is refreshed with new device and host timestamp
793 // after a certain interval (determined by TimeTillRefresh) to account for
794 // clock drift between host and device.
795 //
797  using namespace std::chrono;
798  uint64_t HostTime =
799  duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
800  .count();
801 
802  // To account for potential clock drift between host clock and device clock.
803  // The value set is arbitrary: 200 seconds
804  constexpr uint64_t TimeTillRefresh = 200e9;
805  assert(HostTime >= MDeviceHostBaseTime.second);
806  uint64_t Diff = HostTime - MDeviceHostBaseTime.second;
807 
808  // If getCurrentDeviceTime is called for the first time or we have to refresh.
809  if (!MDeviceHostBaseTime.second || Diff > TimeTillRefresh) {
810  const auto &Plugin = getPlugin();
811  auto Result = Plugin->call_nocheck<UrApiKind::urDeviceGetGlobalTimestamps>(
812  MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second);
813  // We have to remember base host timestamp right after UR call and it is
814  // going to be used for calculation of the device timestamp at the next
815  // getCurrentDeviceTime() call. We need to do it here because getPlugin()
816  // and urDeviceGetGlobalTimestamps calls may take significant amount of time,
817  // for example on the first call to getPlugin plugins may need to be
818  // initialized. If we use timestamp from the beginning of the function then
819  // the difference between host timestamps of the current
820  // getCurrentDeviceTime and the next getCurrentDeviceTime will be incorrect
821  // because it will include execution time of the code before we get device
822  // timestamp from urDeviceGetGlobalTimestamps.
823  HostTime =
824  duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
825  .count();
826  if (Result == UR_RESULT_ERROR_INVALID_OPERATION) {
827  // NOTE(UR port): Removed the call to GetLastError because we shouldn't
828  // be calling it after ERROR_INVALID_OPERATION: there is no
829  // adapter-specific error.
830  throw detail::set_ur_error(
833  "Device and/or backend does not support querying timestamp."),
834  UR_RESULT_ERROR_INVALID_OPERATION);
835  } else {
836  Plugin->checkUrResult<errc::feature_not_supported>(Result);
837  }
838  // Until next sync we will compute device time based on the host time
839  // returned in HostTime, so make this our base host time.
840  MDeviceHostBaseTime.second = HostTime;
841  Diff = 0;
842  }
843  return MDeviceHostBaseTime.first + Diff;
844 }
845 
847  const auto &Plugin = getPlugin();
848  uint64_t DeviceTime = 0, HostTime = 0;
849  auto Result = Plugin->call_nocheck<UrApiKind::urDeviceGetGlobalTimestamps>(
850  MDevice, &DeviceTime, &HostTime);
851  return Result != UR_RESULT_ERROR_INVALID_OPERATION;
852 }
853 
856  try {
859  } catch (sycl::exception &) {
860  return false;
861  }
862 }
863 
864 // Returns the strongest guarantee that can be provided by the host device for
865 // threads created at threadScope from a coordination scope given by
866 // coordinationScope
871  return sycl::ext::oneapi::experimental::forward_progress_guarantee::
872  weakly_parallel;
873 }
874 
875 // Returns the strongest progress guarantee that can be provided by this device
876 // for threads created at threadScope from the coordination scope given by
877 // coordinationScope.
881  ext::oneapi::experimental::execution_scope coordinationScope) const {
885  const int executionScopeSize = 4;
886  (void)coordinationScope;
887  int threadScopeNum = static_cast<int>(threadScope);
888  // we get the immediate progress guarantee that is provided by each scope
889  // between root_group and threadScope and then return the weakest of these.
890  // Counterintuitively, this corresponds to taking the max of the enum values
891  // because of how the forward_progress_guarantee enum values are declared.
892  int guaranteeNum = static_cast<int>(
893  getImmediateProgressGuarantee(execution_scope::root_group));
894  for (int currentScope = executionScopeSize - 2; currentScope > threadScopeNum;
895  --currentScope) {
896  guaranteeNum = std::max(guaranteeNum,
897  static_cast<int>(getImmediateProgressGuarantee(
898  static_cast<execution_scope>(currentScope))));
899  }
900  return static_cast<forward_progress_guarantee>(guaranteeNum);
901 }
902 
906  ext::oneapi::experimental::execution_scope coordinationScope) const {
907  using ReturnT =
908  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
909  auto guarantees = getProgressGuaranteesUpTo<ReturnT>(
910  getProgressGuarantee(threadScope, coordinationScope));
911  return std::find(guarantees.begin(), guarantees.end(), guarantee) !=
912  guarantees.end();
913 }
914 
915 // Returns the progress guarantee provided for a coordination scope
916 // given by coordination_scope for threads created at a scope
917 // immediately below coordination_scope. For example, for root_group
918 // coordination scope it returns the progress guarantee provided
919 // at root_group for threads created at work_group.
922  ext::oneapi::experimental::execution_scope coordination_scope) const {
926  if (is_cpu() && getBackend() == backend::opencl) {
927  switch (coordination_scope) {
928  case execution_scope::root_group:
929  return forward_progress_guarantee::parallel;
930  case execution_scope::work_group:
931  case execution_scope::sub_group:
932  return forward_progress_guarantee::weakly_parallel;
933  default:
934  throw sycl::exception(sycl::errc::invalid,
935  "Work item is not a valid coordination scope!");
936  }
937  } else if (is_gpu() && getBackend() == backend::ext_oneapi_level_zero) {
938  switch (coordination_scope) {
939  case execution_scope::root_group:
940  case execution_scope::work_group:
941  return forward_progress_guarantee::concurrent;
942  case execution_scope::sub_group:
943  return forward_progress_guarantee::weakly_parallel;
944  default:
945  throw sycl::exception(sycl::errc::invalid,
946  "Work item is not a valid coordination scope!");
947  }
948  }
949  return forward_progress_guarantee::weakly_parallel;
950 }
951 
952 } // namespace detail
953 } // namespace _V1
954 } // namespace sycl
ur_native_handle_t getNative() const
Gets the native handle of the SYCL device.
std::vector< device > create_sub_devices() const
Partition device into sub devices.
bool has(aspect Aspect) const
Indicates if the SYCL device has the given feature.
Param::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
platform get_platform() const
Get associated SYCL platform.
bool isGetDeviceAndHostTimerSupported()
Check clGetDeviceAndHostTimer is available for fallback profiling.
std::string get_device_info_string(ur_device_info_t InfoCode) const
Get device info string.
device_impl()
Constructs a SYCL device instance as a host device.
bool is_cpu() const
Check if device is a CPU device.
Definition: device_impl.hpp:77
backend getBackend() const
Get the backend of this device.
const PluginPtr & getPlugin() const
bool is_gpu() const
Check if device is a GPU device.
Definition: device_impl.hpp:82
uint64_t getCurrentDeviceTime()
Gets the current device timestamp.
std::string getDeviceName() const
bool has_extension(const std::string &ExtensionName) const
Check SYCL extension support by device.
ur_device_handle_t & getHandleRef()
Get reference to UR device.
Definition: device_impl.hpp:65
static sycl::ext::oneapi::experimental::forward_progress_guarantee getHostProgressGuarantee(sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope)
sycl::ext::oneapi::experimental::forward_progress_guarantee getProgressGuarantee(ext::oneapi::experimental::execution_scope threadScope, ext::oneapi::experimental::execution_scope coordinationScope) const
bool is_affinity_supported(info::partition_affinity_domain AffinityDomain) const
Check if affinity partitioning by specified domain is supported by device.
Definition: device_impl.cpp:90
cl_device_id get() const
Get instance of OpenCL device.
Definition: device_impl.cpp:97
ext::oneapi::experimental::architecture getDeviceArch() const
Get device architecture.
bool supportsForwardProgress(ext::oneapi::experimental::forward_progress_guarantee guarantee, ext::oneapi::experimental::execution_scope threadScope, ext::oneapi::experimental::execution_scope coordinationScope) const
ext::oneapi::experimental::forward_progress_guarantee getImmediateProgressGuarantee(ext::oneapi::experimental::execution_scope coordination_scope) const
bool extOneapiCanCompile(ext::oneapi::experimental::source_language Language)
bool is_partition_supported(info::partition_property Prop) const
Check if desired partition property supported by device.
bool is_accelerator() const
Check if device is an accelerator device.
Definition: device_impl.hpp:87
static std::shared_ptr< platform_impl > getPlatformFromUrDevice(ur_device_handle_t UrDevice, const PluginPtr &Plugin)
Queries the cache for the specified platform based on an input device.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
Encapsulates a SYCL platform on which kernels may be executed.
Definition: platform.hpp:99
#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e)
Definition: common.hpp:364
std::string affinityDomainToString(info::partition_affinity_domain AffinityDomain)
Definition: device_info.hpp:88
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
std::shared_ptr< plugin > PluginPtr
Definition: ur.hpp:107
std::shared_ptr< detail::platform_impl > PlatformImplPtr
exception set_ur_error(exception &&e, int32_t ur_err)
Definition: exception.hpp:157
Function for_each(Group g, Ptr first, Ptr last, Function f)
bool is_source_kernel_bundle_supported(backend BE, source_language Language)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:65
Definition: access.hpp:18
#define __SYCL_CHECK_UR_CODE_NO_EXC(expr)
Definition: plugin.hpp:27
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
C++ utilities for Unified Runtime integration.
#define UR_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT
Extension to denote native support of assert feature by an arbitrary device urDeviceGetInfo call shou...
Definition: ur.hpp:29