DPC++ Runtime
Runtime libraries for oneAPI DPC++
device_impl.cpp
Go to the documentation of this file.
1 //==----------------- device_impl.cpp - SYCL device ------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/device_impl.hpp>
10 #include <detail/device_info.hpp>
11 #include <detail/platform_impl.hpp>
12 #include <sycl/device.hpp>
13 
14 #include <algorithm>
15 
16 namespace sycl {
17 inline namespace _V1 {
18 namespace detail {
19 
21  : MIsHostDevice(true), MPlatform(platform_impl::getHostPlatformImpl()),
22  // assert is natively supported by host
23  MIsAssertFailSupported(true) {}
24 
26  const PluginPtr &Plugin)
27  : device_impl(InteropDeviceHandle, nullptr, nullptr, Plugin) {}
28 
30  PlatformImplPtr Platform)
31  : device_impl(reinterpret_cast<pi_native_handle>(nullptr), Device, Platform,
32  Platform->getPlugin()) {}
33 
35  const PluginPtr &Plugin)
36  : device_impl(reinterpret_cast<pi_native_handle>(nullptr), Device, nullptr,
37  Plugin) {}
38 
39 device_impl::device_impl(pi_native_handle InteropDeviceHandle,
41  PlatformImplPtr Platform, const PluginPtr &Plugin)
42  : MDevice(Device), MIsHostDevice(false),
43  MDeviceHostBaseTime(std::make_pair(0, 0)) {
44 
45  bool InteroperabilityConstructor = false;
46  if (Device == nullptr) {
47  assert(InteropDeviceHandle);
48  // Get PI device from the raw device handle.
49  // NOTE: this is for OpenCL interop only (and should go away).
50  // With SYCL-2020 BE generalization "make" functions are used instead.
52  InteropDeviceHandle, nullptr, &MDevice);
53  InteroperabilityConstructor = true;
54  }
55 
56  // TODO catch an exception and put it to list of asynchronous exceptions
57  Plugin->call<PiApiKind::piDeviceGetInfo>(
59  &MType, nullptr);
60 
61  // No need to set MRootDevice when MAlwaysRootDevice is true
62  if ((Platform == nullptr) || !Platform->MAlwaysRootDevice) {
63  // TODO catch an exception and put it to list of asynchronous exceptions
64  Plugin->call<PiApiKind::piDeviceGetInfo>(
66  sizeof(sycl::detail::pi::PiDevice), &MRootDevice, nullptr);
67  }
68 
69  if (!InteroperabilityConstructor) {
70  // TODO catch an exception and put it to list of asynchronous exceptions
71  // Interoperability Constructor already calls DeviceRetain in
72  // piextDeviceFromNative.
73  Plugin->call<PiApiKind::piDeviceRetain>(MDevice);
74  }
75 
76  // set MPlatform
77  if (!Platform) {
78  Platform = platform_impl::getPlatformFromPiDevice(MDevice, Plugin);
79  }
80  MPlatform = Platform;
81 
82  MIsAssertFailSupported =
84 }
85 
87  if (!MIsHostDevice) {
88  // TODO catch an exception and put it to list of asynchronous exceptions
89  const PluginPtr &Plugin = getPlugin();
91  Plugin->call_nocheck<PiApiKind::piDeviceRelease>(MDevice);
93  }
94 }
95 
97  info::partition_affinity_domain AffinityDomain) const {
98  auto SupportedDomains = get_info<info::device::partition_affinity_domains>();
99  return std::find(SupportedDomains.begin(), SupportedDomains.end(),
100  AffinityDomain) != SupportedDomains.end();
101 }
102 
103 cl_device_id device_impl::get() const {
104  if (MIsHostDevice) {
105  throw invalid_object_error(
106  "This instance of device doesn't support OpenCL interoperability.",
107  PI_ERROR_INVALID_DEVICE);
108  }
109  // TODO catch an exception and put it to list of asynchronous exceptions
110  getPlugin()->call<PiApiKind::piDeviceRetain>(MDevice);
111  return pi::cast<cl_device_id>(getNative());
112 }
113 
115  return createSyclObjFromImpl<platform>(MPlatform);
116 }
117 
118 template <typename Param>
119 typename Param::return_type device_impl::get_info() const {
120  if (is_host()) {
121  return get_device_info_host<Param>();
122  }
123  return get_device_info<Param>(
124  MPlatform->getOrMakeDeviceImpl(MDevice, MPlatform));
125 }
126 // Explicitly instantiate all device info traits
127 #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \
128  template ReturnT device_impl::get_info<info::device::Desc>() const;
129 
130 #define __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(DescType, Desc, ReturnT, PiCode) \
131  template ReturnT device_impl::get_info<info::device::Desc>() const;
132 
133 #include <sycl/info/device_traits.def>
134 #undef __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED
135 #undef __SYCL_PARAM_TRAITS_SPEC
136 
137 #define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \
138  template __SYCL_EXPORT ReturnT \
139  device_impl::get_info<Namespace::info::DescType::Desc>() const;
140 
141 #include <sycl/info/ext_codeplay_device_traits.def>
142 #include <sycl/info/ext_intel_device_traits.def>
143 #include <sycl/info/ext_oneapi_device_traits.def>
144 #undef __SYCL_PARAM_TRAITS_SPEC
145 
146 template <>
147 typename info::platform::version::return_type
148 device_impl::get_backend_info<info::platform::version>() const {
149  if (getBackend() != backend::opencl) {
151  "the info::platform::version info descriptor can "
152  "only be queried with an OpenCL backend");
153  }
154  return get_platform().get_info<info::platform::version>();
155 }
156 
157 template <>
158 typename info::device::version::return_type
159 device_impl::get_backend_info<info::device::version>() const {
160  if (getBackend() != backend::opencl) {
162  "the info::device::version info descriptor can only "
163  "be queried with an OpenCL backend");
164  }
165  return get_info<info::device::version>();
166 }
167 
168 template <>
169 typename info::device::backend_version::return_type
170 device_impl::get_backend_info<info::device::backend_version>() const {
171  if (getBackend() != backend::ext_oneapi_level_zero) {
173  "the info::device::backend_version info descriptor "
174  "can only be queried with a Level Zero backend");
175  }
176  return "";
177  // Currently The Level Zero backend does not define the value of this
178  // information descriptor and implementations are encouraged to return the
179  // empty string as per specification.
180 }
181 
182 bool device_impl::has_extension(const std::string &ExtensionName) const {
183  if (MIsHostDevice)
184  // TODO: implement extension management for host device;
185  return false;
186  std::string AllExtensionNames =
188  return (AllExtensionNames.find(ExtensionName) != std::string::npos);
189 }
190 
192  auto SupportedProperties = get_info<info::device::partition_properties>();
193  return std::find(SupportedProperties.begin(), SupportedProperties.end(),
194  Prop) != SupportedProperties.end();
195 }
196 
197 std::vector<device>
198 device_impl::create_sub_devices(const cl_device_partition_property *Properties,
199  size_t SubDevicesCount) const {
200 
201  std::vector<sycl::detail::pi::PiDevice> SubDevices(SubDevicesCount);
202  pi_uint32 ReturnedSubDevices = 0;
203  const PluginPtr &Plugin = getPlugin();
204  Plugin->call<sycl::errc::invalid, PiApiKind::piDevicePartition>(
205  MDevice, Properties, SubDevicesCount, SubDevices.data(),
206  &ReturnedSubDevices);
207  if (ReturnedSubDevices != SubDevicesCount) {
208  throw sycl::exception(
210  "Could not partition to the specified number of sub-devices");
211  }
212  // TODO: Need to describe the subdevice model. Some sub_device management
213  // may be necessary. What happens if create_sub_devices is called multiple
214  // times with the same arguments?
215  //
216  std::vector<device> res;
217  std::for_each(SubDevices.begin(), SubDevices.end(),
218  [&res, this](const sycl::detail::pi::PiDevice &a_pi_device) {
219  device sycl_device = detail::createSyclObjFromImpl<device>(
220  MPlatform->getOrMakeDeviceImpl(a_pi_device, MPlatform));
221  res.push_back(sycl_device);
222  });
223  return res;
224 }
225 
226 std::vector<device> device_impl::create_sub_devices(size_t ComputeUnits) const {
227  assert(!MIsHostDevice && "Partitioning is not supported on host.");
228 
230  throw sycl::feature_not_supported(
231  "Device does not support "
232  "sycl::info::partition_property::partition_equally.",
233  PI_ERROR_INVALID_OPERATION);
234  }
235  // If count exceeds the total number of compute units in the device, an
236  // exception with the errc::invalid error code must be thrown.
237  auto MaxComputeUnits = get_info<info::device::max_compute_units>();
238  if (ComputeUnits > MaxComputeUnits)
240  "Total counts exceed max compute units");
241 
242  size_t SubDevicesCount = MaxComputeUnits / ComputeUnits;
243  const pi_device_partition_property Properties[3] = {
245  0};
246  return create_sub_devices(Properties, SubDevicesCount);
247 }
248 
249 std::vector<device>
250 device_impl::create_sub_devices(const std::vector<size_t> &Counts) const {
251  assert(!MIsHostDevice && "Partitioning is not supported on host.");
252 
254  throw sycl::feature_not_supported(
255  "Device does not support "
256  "sycl::info::partition_property::partition_by_counts.",
257  PI_ERROR_INVALID_OPERATION);
258  }
259  static const pi_device_partition_property P[] = {
261  std::vector<pi_device_partition_property> Properties(P, P + 3);
262 
263  // Fill the properties vector with counts and validate it
264  auto It = Properties.begin() + 1;
265  size_t TotalCounts = 0;
266  size_t NonZeroCounts = 0;
267  for (auto Count : Counts) {
268  TotalCounts += Count;
269  NonZeroCounts += (Count != 0) ? 1 : 0;
270  It = Properties.insert(It, Count);
271  }
272 
273  // If the number of non-zero values in counts exceeds the device‚Äôs maximum
274  // number of sub devices (as returned by info::device::
275  // partition_max_sub_devices) an exception with the errc::invalid
276  // error code must be thrown.
277  if (NonZeroCounts > get_info<info::device::partition_max_sub_devices>())
279  "Total non-zero counts exceed max sub-devices");
280 
281  // If the total of all the values in the counts vector exceeds the total
282  // number of compute units in the device (as returned by
283  // info::device::max_compute_units), an exception with the errc::invalid
284  // error code must be thrown.
285  if (TotalCounts > get_info<info::device::max_compute_units>())
287  "Total counts exceed max compute units");
288 
289  return create_sub_devices(Properties.data(), Counts.size());
290 }
291 
293  info::partition_affinity_domain AffinityDomain) const {
294  assert(!MIsHostDevice && "Partitioning is not supported on host.");
295 
298  throw sycl::feature_not_supported(
299  "Device does not support "
300  "sycl::info::partition_property::partition_by_affinity_domain.",
301  PI_ERROR_INVALID_OPERATION);
302  }
303  if (!is_affinity_supported(AffinityDomain)) {
304  throw sycl::feature_not_supported(
305  "Device does not support " + affinityDomainToString(AffinityDomain) +
306  ".",
307  PI_ERROR_INVALID_VALUE);
308  }
309  const pi_device_partition_property Properties[3] = {
311  (pi_device_partition_property)AffinityDomain, 0};
312 
313  pi_uint32 SubDevicesCount = 0;
314  const PluginPtr &Plugin = getPlugin();
315  Plugin->call<sycl::errc::invalid, PiApiKind::piDevicePartition>(
316  MDevice, Properties, 0, nullptr, &SubDevicesCount);
317 
318  return create_sub_devices(Properties, SubDevicesCount);
319 }
320 
321 std::vector<device> device_impl::create_sub_devices() const {
322  assert(!MIsHostDevice && "Partitioning is not supported on host.");
323 
326  throw sycl::feature_not_supported(
327  "Device does not support "
328  "sycl::info::partition_property::ext_intel_partition_by_cslice.",
329  PI_ERROR_INVALID_OPERATION);
330  }
331 
332  const pi_device_partition_property Properties[2] = {
334 
335  pi_uint32 SubDevicesCount = 0;
336  const PluginPtr &Plugin = getPlugin();
337  Plugin->call<sycl::errc::invalid, PiApiKind::piDevicePartition>(
338  MDevice, Properties, 0, nullptr, &SubDevicesCount);
339 
340  return create_sub_devices(Properties, SubDevicesCount);
341 }
342 
344  auto Plugin = getPlugin();
345  if (getBackend() == backend::opencl)
346  Plugin->call<PiApiKind::piDeviceRetain>(getHandleRef());
347  pi_native_handle Handle;
348  Plugin->call<PiApiKind::piextDeviceGetNativeHandle>(getHandleRef(), &Handle);
349  return Handle;
350 }
351 
352 bool device_impl::has(aspect Aspect) const {
353  size_t return_size = 0;
354 
355  switch (Aspect) {
356  case aspect::host:
357  return is_host();
358  case aspect::cpu:
359  return is_cpu();
360  case aspect::gpu:
361  return is_gpu();
362  case aspect::accelerator:
363  return is_accelerator();
364  case aspect::custom:
365  return false;
366  // TODO: Implement this for FPGA and ESIMD emulators.
367  case aspect::emulated:
368  return false;
369  case aspect::host_debuggable:
370  return false;
371  case aspect::fp16:
372  return has_extension("cl_khr_fp16");
373  case aspect::fp64:
374  return has_extension("cl_khr_fp64");
375  case aspect::ext_oneapi_bfloat16_math_functions:
376  return get_info<info::device::ext_oneapi_bfloat16_math_functions>();
377  case aspect::int64_base_atomics:
378  return has_extension("cl_khr_int64_base_atomics");
379  case aspect::int64_extended_atomics:
380  return has_extension("cl_khr_int64_extended_atomics");
381  case aspect::atomic64:
382  return get_info<info::device::atomic64>();
383  case aspect::image:
384  return get_info<info::device::image_support>();
385  case aspect::online_compiler:
386  return get_info<info::device::is_compiler_available>();
387  case aspect::online_linker:
388  return get_info<info::device::is_linker_available>();
389  case aspect::queue_profiling:
390  return get_info<info::device::queue_profiling>();
391  case aspect::usm_device_allocations:
392  return get_info<info::device::usm_device_allocations>();
393  case aspect::usm_host_allocations:
394  return get_info<info::device::usm_host_allocations>();
395  case aspect::ext_intel_mem_channel:
396  return get_info<info::device::ext_intel_mem_channel>();
397  case aspect::usm_atomic_host_allocations:
398  return is_host() ||
400  info::device::usm_host_allocations>::
401  get(MPlatform->getDeviceImpl(MDevice)) &
403  case aspect::usm_shared_allocations:
404  return get_info<info::device::usm_shared_allocations>();
405  case aspect::usm_atomic_shared_allocations:
406  return is_host() ||
408  info::device::usm_shared_allocations>::
409  get(MPlatform->getDeviceImpl(MDevice)) &
411  case aspect::usm_restricted_shared_allocations:
412  return get_info<info::device::usm_restricted_shared_allocations>();
413  case aspect::usm_system_allocations:
414  return get_info<info::device::usm_system_allocations>();
415  case aspect::ext_intel_device_id:
416  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
417  MDevice, PI_DEVICE_INFO_DEVICE_ID, 0, nullptr, &return_size) ==
418  PI_SUCCESS;
419  case aspect::ext_intel_pci_address:
420  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
421  MDevice, PI_DEVICE_INFO_PCI_ADDRESS, 0, nullptr, &return_size) ==
422  PI_SUCCESS;
423  case aspect::ext_intel_gpu_eu_count:
424  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
425  MDevice, PI_DEVICE_INFO_GPU_EU_COUNT, 0, nullptr,
426  &return_size) == PI_SUCCESS;
427  case aspect::ext_intel_gpu_eu_simd_width:
428  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
429  MDevice, PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, 0, nullptr,
430  &return_size) == PI_SUCCESS;
431  case aspect::ext_intel_gpu_slices:
432  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
433  MDevice, PI_DEVICE_INFO_GPU_SLICES, 0, nullptr, &return_size) ==
434  PI_SUCCESS;
435  case aspect::ext_intel_gpu_subslices_per_slice:
436  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
437  MDevice, PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, 0, nullptr,
438  &return_size) == PI_SUCCESS;
439  case aspect::ext_intel_gpu_eu_count_per_subslice:
440  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
441  MDevice, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, 0, nullptr,
442  &return_size) == PI_SUCCESS;
443  case aspect::ext_intel_gpu_hw_threads_per_eu:
444  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
445  MDevice, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, 0, nullptr,
446  &return_size) == PI_SUCCESS;
447  case aspect::ext_intel_free_memory:
448  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
449  MDevice, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, 0, nullptr,
450  &return_size) == PI_SUCCESS;
451  case aspect::ext_intel_memory_clock_rate:
452  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
453  MDevice, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, 0, nullptr,
454  &return_size) == PI_SUCCESS;
455  case aspect::ext_intel_memory_bus_width:
456  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
457  MDevice, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, 0, nullptr,
458  &return_size) == PI_SUCCESS;
459  case aspect::ext_intel_device_info_uuid: {
460  auto Result = getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
461  MDevice, PI_DEVICE_INFO_UUID, 0, nullptr, &return_size);
462  if (Result != PI_SUCCESS) {
463  return false;
464  }
465 
466  assert(return_size <= 16);
467  unsigned char UUID[16];
468 
469  return getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
470  MDevice, PI_DEVICE_INFO_UUID, 16 * sizeof(unsigned char), UUID,
471  nullptr) == PI_SUCCESS;
472  }
473  case aspect::ext_intel_max_mem_bandwidth:
474  // currently not supported
475  return false;
476  case aspect::ext_oneapi_srgb:
477  return get_info<info::device::ext_oneapi_srgb>();
478  case aspect::ext_oneapi_native_assert:
479  return isAssertFailSupported();
480  case aspect::ext_oneapi_cuda_async_barrier: {
481  int async_barrier_supported;
482  bool call_successful =
484  MDevice, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER, sizeof(int),
485  &async_barrier_supported, nullptr) == PI_SUCCESS;
486  return call_successful && async_barrier_supported;
487  }
488  case aspect::ext_intel_legacy_image: {
489  pi_bool legacy_image_support = PI_FALSE;
490  bool call_successful =
492  MDevice, PI_DEVICE_INFO_IMAGE_SUPPORT, sizeof(pi_bool),
493  &legacy_image_support, nullptr) == PI_SUCCESS;
494  return call_successful && legacy_image_support;
495  }
496  case aspect::ext_oneapi_bindless_images: {
497  pi_bool support = PI_FALSE;
498  bool call_successful =
501  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
502  return call_successful && support;
503  }
504  case aspect::ext_oneapi_bindless_images_shared_usm: {
505  pi_bool support = PI_FALSE;
506  bool call_successful =
508  MDevice,
510  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
511  return call_successful && support;
512  }
513  case aspect::ext_oneapi_bindless_images_1d_usm: {
514  pi_bool support = PI_FALSE;
515  bool call_successful =
518  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
519  return call_successful && support;
520  }
521  case aspect::ext_oneapi_bindless_images_2d_usm: {
522  pi_bool support = PI_FALSE;
523  bool call_successful =
526  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
527  return call_successful && support;
528  }
529  case aspect::ext_oneapi_interop_memory_import: {
530  pi_bool support = PI_FALSE;
531  bool call_successful =
534  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
535  return call_successful && support;
536  }
537  case aspect::ext_oneapi_interop_memory_export: {
538  pi_bool support = PI_FALSE;
539  bool call_successful =
542  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
543  return call_successful && support;
544  }
545  case aspect::ext_oneapi_interop_semaphore_import: {
546  pi_bool support = PI_FALSE;
547  bool call_successful =
550  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
551  return call_successful && support;
552  }
553  case aspect::ext_oneapi_interop_semaphore_export: {
554  pi_bool support = PI_FALSE;
555  bool call_successful =
558  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
559  return call_successful && support;
560  }
561  case aspect::ext_oneapi_mipmap: {
562  pi_bool support = PI_FALSE;
563  bool call_successful =
566  &support, nullptr) == PI_SUCCESS;
567  return call_successful && support;
568  }
569  case aspect::ext_oneapi_mipmap_anisotropy: {
570  pi_bool support = PI_FALSE;
571  bool call_successful =
574  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
575  return call_successful && support;
576  }
577  case aspect::ext_oneapi_mipmap_level_reference: {
578  pi_bool support = PI_FALSE;
579  bool call_successful =
582  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
583  return call_successful && support;
584  }
585  case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: {
586  pi_bool support = PI_FALSE;
587  bool call_successful =
589  MDevice,
591  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
592  return call_successful && support;
593  }
594  case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: {
595  pi_bool support = PI_FALSE;
596  bool call_successful =
599  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
600  return call_successful && support;
601  }
602  case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: {
603  pi_bool support = PI_FALSE;
604  bool call_successful =
606  MDevice,
608  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
609  return call_successful && support;
610  }
611  case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: {
612  pi_bool support = PI_FALSE;
613  bool call_successful =
616  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
617  return call_successful && support;
618  }
619  case aspect::ext_oneapi_bindless_sampled_image_fetch_3d_usm: {
620  pi_bool support = PI_FALSE;
621  bool call_successful =
623  MDevice,
625  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
626  return call_successful && support;
627  }
628  case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: {
629  pi_bool support = PI_FALSE;
630  bool call_successful =
633  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
634  return call_successful && support;
635  }
636  case aspect::ext_oneapi_cubemap: {
637  pi_bool support = PI_FALSE;
638  bool call_successful =
641  &support, nullptr) == PI_SUCCESS;
642  return call_successful && support;
643  }
644  case aspect::ext_oneapi_cubemap_seamless_filtering: {
645  pi_bool support = PI_FALSE;
646  bool call_successful =
648  MDevice,
650  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
651  return call_successful && support;
652  }
653  case aspect::ext_intel_esimd: {
654  pi_bool support = PI_FALSE;
655  bool call_successful =
658  &support, nullptr) == PI_SUCCESS;
659  return call_successful && support;
660  }
661  case aspect::ext_oneapi_ballot_group:
662  case aspect::ext_oneapi_fixed_size_group:
663  case aspect::ext_oneapi_opportunistic_group: {
664  return (this->getBackend() == backend::ext_oneapi_level_zero) ||
665  (this->getBackend() == backend::opencl) ||
667  }
668  case aspect::ext_oneapi_tangle_group: {
669  // TODO: tangle_group is not currently supported for CUDA devices. Add when
670  // implemented.
671  return (this->getBackend() == backend::ext_oneapi_level_zero) ||
672  (this->getBackend() == backend::opencl);
673  }
674  case aspect::ext_intel_matrix: {
676  const std::vector<arch> supported_archs = {
677  arch::intel_cpu_spr, arch::intel_gpu_pvc, arch::intel_gpu_dg2_g10,
678  arch::intel_gpu_dg2_g11, arch::intel_gpu_dg2_g12};
679  try {
680  return std::any_of(
681  supported_archs.begin(), supported_archs.end(),
682  [=](const arch a) { return this->extOneapiArchitectureIs(a); });
683  } catch (const sycl::exception &) {
684  // If we're here it means the device does not support architecture
685  // querying
686  return false;
687  }
688  }
689  case aspect::ext_oneapi_is_composite: {
690  auto components = get_info<
691  sycl::ext::oneapi::experimental::info::device::component_devices>();
692  // Any device with ext_oneapi_is_composite aspect will have at least two
693  // constituent component devices.
694  return components.size() >= 2;
695  }
696  case aspect::ext_oneapi_is_component: {
698  return false;
699 
700  typename sycl_to_pi<device>::type Result = nullptr;
701  bool CallSuccessful = getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
702  getHandleRef(),
703  PiInfoCode<ext::oneapi::experimental::info::
704  device::composite_device>::value,
705  sizeof(Result), &Result, nullptr) == PI_SUCCESS;
706 
707  return CallSuccessful && Result != nullptr;
708  }
709  case aspect::ext_oneapi_graph: {
710  pi_bool SupportsCommandBufferUpdate = false;
711  bool CallSuccessful =
712  getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
714  sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate,
715  nullptr) == PI_SUCCESS;
716  if (!CallSuccessful) {
717  return PI_FALSE;
718  }
719 
720  return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate;
721  }
722  case aspect::ext_oneapi_limited_graph: {
723  pi_bool SupportsCommandBuffers = false;
724  bool CallSuccessful =
725  getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
727  sizeof(SupportsCommandBuffers), &SupportsCommandBuffers,
728  nullptr) == PI_SUCCESS;
729  if (!CallSuccessful) {
730  return PI_FALSE;
731  }
732 
733  return SupportsCommandBuffers;
734  }
735  case aspect::ext_intel_fpga_task_sequence: {
736  return is_accelerator();
737  }
738  case aspect::ext_oneapi_private_alloca: {
739  // Extension only supported on SPIR-V targets.
740  backend be = getBackend();
741  return be == sycl::backend::ext_oneapi_level_zero ||
742  be == sycl::backend::opencl;
743  }
744  case aspect::ext_oneapi_queue_profiling_tag: {
745  pi_bool support = PI_FALSE;
746  bool call_successful =
749  sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
750  return call_successful && support;
751  }
752  }
753  throw runtime_error("This device aspect has not been implemented yet.",
754  PI_ERROR_INVALID_DEVICE);
755 }
756 
757 std::shared_ptr<device_impl> device_impl::getHostDeviceImpl() {
758  static std::shared_ptr<device_impl> HostImpl =
759  std::make_shared<device_impl>();
760 
761  return HostImpl;
762 }
763 
765  return MIsAssertFailSupported;
766 }
767 
768 std::string device_impl::getDeviceName() const {
769  std::call_once(MDeviceNameFlag,
770  [this]() { MDeviceName = get_info<info::device::name>(); });
771 
772  return MDeviceName;
773 }
774 
776  std::call_once(MDeviceArchFlag, [this]() {
777  MDeviceArch =
778  get_info<ext::oneapi::experimental::info::device::architecture>();
779  });
780 
781  return MDeviceArch;
782 }
783 
784 // On the first call this function queries for device timestamp
785 // along with host synchronized timestamp and stores it in member variable
786 // MDeviceHostBaseTime. Subsequent calls to this function would just retrieve
787 // the host timestamp, compute difference against the host timestamp in
788 // MDeviceHostBaseTime and calculate the device timestamp based on the
789 // difference.
790 //
791 // The MDeviceHostBaseTime is refreshed with new device and host timestamp
792 // after a certain interval (determined by TimeTillRefresh) to account for
793 // clock drift between host and device.
794 //
796  using namespace std::chrono;
797  uint64_t HostTime =
798  duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
799  .count();
800  if (MIsHostDevice) {
801  return HostTime;
802  }
803 
804  // To account for potential clock drift between host clock and device clock.
805  // The value set is arbitrary: 200 seconds
806  constexpr uint64_t TimeTillRefresh = 200e9;
807  assert(HostTime >= MDeviceHostBaseTime.second);
808  uint64_t Diff = HostTime - MDeviceHostBaseTime.second;
809 
810  // If getCurrentDeviceTime is called for the first time or we have to refresh.
811  if (!MDeviceHostBaseTime.second || Diff > TimeTillRefresh) {
812  const auto &Plugin = getPlugin();
813  auto Result =
814  Plugin->call_nocheck<detail::PiApiKind::piGetDeviceAndHostTimer>(
815  MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second);
816  // We have to remember base host timestamp right after PI call and it is
817  // going to be used for calculation of the device timestamp at the next
818  // getCurrentDeviceTime() call. We need to do it here because getPlugin()
819  // and piGetDeviceAndHostTimer calls may take significant amount of time,
820  // for example on the first call to getPlugin plugins may need to be
821  // initialized. If we use timestamp from the beginning of the function then
822  // the difference between host timestamps of the current
823  // getCurrentDeviceTime and the next getCurrentDeviceTime will be incorrect
824  // because it will include execution time of the code before we get device
825  // timestamp from piGetDeviceAndHostTimer.
826  HostTime =
827  duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
828  .count();
829  if (Result == PI_ERROR_INVALID_OPERATION) {
830  char *p = nullptr;
831  Plugin->call_nocheck<detail::PiApiKind::piPluginGetLastError>(&p);
832  std::string errorMsg(p ? p : "");
833  throw sycl::feature_not_supported(
834  "Device and/or backend does not support querying timestamp: " +
835  errorMsg,
836  Result);
837  } else {
838  Plugin->checkPiResult(Result);
839  }
840  // Until next sync we will compute device time based on the host time
841  // returned in HostTime, so make this our base host time.
842  MDeviceHostBaseTime.second = HostTime;
843  Diff = 0;
844  }
845  return MDeviceHostBaseTime.first + Diff;
846 }
847 
849  const auto &Plugin = getPlugin();
850  uint64_t DeviceTime = 0, HostTime = 0;
851  auto Result =
852  Plugin->call_nocheck<detail::PiApiKind::piGetDeviceAndHostTimer>(
853  MDevice, &DeviceTime, &HostTime);
854  return Result != PI_ERROR_INVALID_OPERATION;
855 }
856 
859  try {
860  return is_source_kernel_bundle_supported(getBackend(), Language);
861  } catch (sycl::exception &) {
862  return false;
863  }
864 }
865 
866 // Returns the strongest guarantee that can be provided by the host device for
867 // threads created at threadScope from a coordination scope given by
868 // coordinationScope
873  return sycl::ext::oneapi::experimental::forward_progress_guarantee::
874  weakly_parallel;
875 }
876 
877 // Returns the strongest progress guarantee that can be provided by this device
878 // for threads created at threadScope from the coordination scope given by
879 // coordinationScope.
883  ext::oneapi::experimental::execution_scope coordinationScope) const {
887  const int executionScopeSize = 4;
888  (void)coordinationScope;
889  int threadScopeNum = static_cast<int>(threadScope);
890  // we get the immediate progress guarantee that is provided by each scope
891  // between root_group and threadScope and then return the weakest of these.
892  // Counterintuitively, this corresponds to taking the max of the enum values
893  // because of how the forward_progress_guarantee enum values are declared.
894  int guaranteeNum = static_cast<int>(
895  getImmediateProgressGuarantee(execution_scope::root_group));
896  for (int currentScope = executionScopeSize - 2; currentScope > threadScopeNum;
897  --currentScope) {
898  guaranteeNum = std::max(guaranteeNum,
899  static_cast<int>(getImmediateProgressGuarantee(
900  static_cast<execution_scope>(currentScope))));
901  }
902  return static_cast<forward_progress_guarantee>(guaranteeNum);
903 }
904 
908  ext::oneapi::experimental::execution_scope coordinationScope) const {
909  using ReturnT =
910  std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
911  auto guarantees = getProgressGuaranteesUpTo<ReturnT>(
912  getProgressGuarantee(threadScope, coordinationScope));
913  return std::find(guarantees.begin(), guarantees.end(), guarantee) !=
914  guarantees.end();
915 }
916 
917 // Returns the progress guarantee provided for a coordination scope
918 // given by coordination_scope for threads created at a scope
919 // immediately below coordination_scope. For example, for root_group
920 // coordination scope it returns the progress guarantee provided
921 // at root_group for threads created at work_group.
924  ext::oneapi::experimental::execution_scope coordination_scope) const {
928  if (is_cpu() && getBackend() == backend::opencl) {
929  switch (coordination_scope) {
930  case execution_scope::root_group:
931  return forward_progress_guarantee::parallel;
932  case execution_scope::work_group:
933  case execution_scope::sub_group:
934  return forward_progress_guarantee::weakly_parallel;
935  default:
936  throw sycl::exception(sycl::errc::invalid,
937  "Work item is not a valid coordination scope!");
938  }
939  } else if (is_gpu() && getBackend() == backend::ext_oneapi_level_zero) {
940  switch (coordination_scope) {
941  case execution_scope::root_group:
942  case execution_scope::work_group:
943  return forward_progress_guarantee::concurrent;
944  case execution_scope::sub_group:
945  return forward_progress_guarantee::weakly_parallel;
946  default:
947  throw sycl::exception(sycl::errc::invalid,
948  "Work item is not a valid coordination scope!");
949  }
950  }
951  return forward_progress_guarantee::weakly_parallel;
952 }
953 
954 } // namespace detail
955 } // namespace _V1
956 } // namespace sycl
std::vector< device > create_sub_devices() const
Partition device into sub devices.
bool has(aspect Aspect) const
Indicates if the SYCL device has the given feature.
bool is_host() const
Check if SYCL device is a host device.
Definition: device_impl.hpp:91
Param::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
platform get_platform() const
Get associated SYCL platform.
bool isGetDeviceAndHostTimerSupported()
Check clGetDeviceAndHostTimer is available for fallback profiling.
std::string get_device_info_string(sycl::detail::pi::PiDeviceInfo InfoCode) const
Get device info string.
device_impl()
Constructs a SYCL device instance as a host device.
Definition: device_impl.cpp:20
bool is_cpu() const
Check if device is a CPU device.
Definition: device_impl.hpp:96
backend getBackend() const
Get the backend of this device.
const PluginPtr & getPlugin() const
pi_native_handle getNative() const
Gets the native handle of the SYCL device.
bool is_gpu() const
Check if device is a GPU device.
uint64_t getCurrentDeviceTime()
Gets the current device timestamp.
std::string getDeviceName() const
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
Definition: device_impl.hpp:67
bool has_extension(const std::string &ExtensionName) const
Check SYCL extension support by device.
static sycl::ext::oneapi::experimental::forward_progress_guarantee getHostProgressGuarantee(sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope)
sycl::ext::oneapi::experimental::forward_progress_guarantee getProgressGuarantee(ext::oneapi::experimental::execution_scope threadScope, ext::oneapi::experimental::execution_scope coordinationScope) const
bool is_affinity_supported(info::partition_affinity_domain AffinityDomain) const
Check if affinity partitioning by specified domain is supported by device.
Definition: device_impl.cpp:96
cl_device_id get() const
Get instance of OpenCL device.
ext::oneapi::experimental::architecture getDeviceArch() const
Get device architecture.
bool supportsForwardProgress(ext::oneapi::experimental::forward_progress_guarantee guarantee, ext::oneapi::experimental::execution_scope threadScope, ext::oneapi::experimental::execution_scope coordinationScope) const
ext::oneapi::experimental::forward_progress_guarantee getImmediateProgressGuarantee(ext::oneapi::experimental::execution_scope coordination_scope) const
bool extOneapiCanCompile(ext::oneapi::experimental::source_language Language)
bool is_partition_supported(info::partition_property Prop) const
Check if desired partition property supported by device.
static std::shared_ptr< device_impl > getHostDeviceImpl()
Gets the single instance of the Host Device.
bool is_accelerator() const
Check if device is an accelerator device.
static std::shared_ptr< platform_impl > getPlatformFromPiDevice(sycl::detail::pi::PiDevice PiDevice, const PluginPtr &Plugin)
Queries the cache for the specified platform based on an input device.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
Encapsulates a SYCL platform on which kernels may be executed.
Definition: platform.hpp:99
::pi_device PiDevice
Definition: pi.hpp:114
::pi_device_type PiDeviceType
Definition: pi.hpp:115
std::string affinityDomainToString(info::partition_affinity_domain AffinityDomain)
Definition: device_info.hpp:87
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:47
std::shared_ptr< detail::platform_impl > PlatformImplPtr
Function for_each(Group g, Ptr first, Ptr last, Function f)
bool is_source_kernel_bundle_supported(backend BE, source_language Language)
Definition: access.hpp:18
#define PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT
Extension to denote native support of assert feature by an arbitrary device piDeviceGetInfo call shou...
Definition: pi.h:1023
uintptr_t pi_native_handle
Definition: pi.h:243
pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle)
Gets the native handle of a PI device object.
Definition: pi_cuda.cpp:100
_pi_result
Definition: pi.h:250
pi_uint32 pi_bool
Definition: pi.h:241
static constexpr pi_device_partition_property PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE
Definition: pi.h:905
_pi_usm_capabilities pi_usm_capabilities
Definition: pi.h:2080
@ PI_DEVICE_INFO_GPU_SLICES
Definition: pi.h:423
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D
Definition: pi.h:499
@ PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT
Definition: pi.h:478
@ PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT
Definition: pi.h:494
@ PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT
Definition: pi.h:491
@ PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT
Definition: pi.h:458
@ PI_DEVICE_INFO_GPU_EU_COUNT
Definition: pi.h:421
@ PI_DEVICE_INFO_IMAGE_SUPPORT
Definition: pi.h:358
@ PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY
Definition: pi.h:430
@ PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT
Definition: pi.h:479
@ PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT
Definition: pi.h:506
@ PI_DEVICE_INFO_UUID
Definition: pi.h:416
@ PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT
Definition: pi.h:474
@ PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT
Definition: pi.h:475
@ PI_DEVICE_INFO_PARENT_DEVICE
Definition: pi.h:402
@ PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH
Definition: pi.h:422
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT
Definition: pi.h:466
@ PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU
Definition: pi.h:443
@ PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT
Definition: pi.h:495
@ PI_DEVICE_INFO_DEVICE_ID
Definition: pi.h:419
@ PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT
Definition: pi.h:481
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM
Definition: pi.h:502
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM
Definition: pi.h:498
@ PI_DEVICE_INFO_PCI_ADDRESS
Definition: pi.h:420
@ PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER
Definition: pi.h:451
@ PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE
Definition: pi.h:425
@ PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT
Definition: pi.h:477
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D
Definition: pi.h:501
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM
Definition: pi.h:500
@ PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE
Definition: pi.h:433
@ PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT
Definition: pi.h:480
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT
Definition: pi.h:467
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT
Definition: pi.h:468
@ PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH
Definition: pi.h:436
@ PI_DEVICE_INFO_TYPE
Definition: pi.h:331
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT
Definition: pi.h:469
@ PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_SUPPORT
Definition: pi.h:490
@ PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE
Definition: pi.h:424
@ PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
Definition: pi.h:503
static constexpr pi_device_partition_property PI_DEVICE_PARTITION_BY_COUNTS
Definition: pi.h:898
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:78
@ PI_USM_CONCURRENT_ATOMIC_ACCESS
Definition: pi.h:2055
static constexpr pi_device_partition_property PI_DEVICE_PARTITION_BY_COUNTS_LIST_END
Definition: pi.h:901
static constexpr pi_device_partition_property PI_DEVICE_PARTITION_EQUALLY
Definition: pi.h:896
const pi_bool PI_FALSE
Definition: pi.h:727
uint32_t pi_uint32
Definition: pi.h:239
static constexpr pi_device_partition_property PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN
Definition: pi.h:903
pi_result piPluginGetLastError(char **message)
API to get Plugin specific warning and error messages.
Definition: pi_cuda.cpp:52
pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform platform, pi_device *device)
Creates PI device object from a native handle.
Definition: pi_cuda.cpp:106
pi_result piDeviceRetain(pi_device device)
Definition: pi_cuda.cpp:70
pi_result piDeviceRelease(pi_device device)
Definition: pi_cuda.cpp:74
pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime)
Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the h...
Definition: pi_cuda.cpp:1275
intptr_t pi_device_partition_property
Definition: pi.h:895
pi_result piDevicePartition(pi_device device, const pi_device_partition_property *properties, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices)
Definition: pi_cuda.cpp:85
#define __SYCL_CHECK_OCL_CODE_NO_EXC(X)
Definition: plugin.hpp:59
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept