DPC++ Runtime
Runtime libraries for oneAPI DPC++
device.hpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Copyright (C) Codeplay Software Ltd.
4  *
5  * Part of the LLVM Project, under the Apache License v2.0 with LLVM
6  * Exceptions. See https://llvm.org/LICENSE.txt for license information.
7  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  *
15  * SYCL compatibility extension
16  *
17  * device.hpp
18  *
19  * Description:
20  * Device functionality for the SYCL compatibility extension
21  **************************************************************************/
22 
23 // The original source was under the license below:
24 //==---- device.hpp -------------------------------*- C++ -*----------------==//
25 //
26 // Copyright (C) Intel Corporation
27 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
28 // See https://llvm.org/LICENSE.txt for license information.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #pragma once
33 
34 #include <algorithm>
35 #include <cstring>
36 #include <iostream>
37 #include <map>
38 #include <mutex>
39 #include <set>
40 #include <sstream>
41 #include <thread>
42 #include <vector>
43 #if defined(__linux__)
44 #include <sys/syscall.h>
45 #include <unistd.h>
46 #endif
47 #if defined(_WIN64)
48 #ifndef NOMINMAX
49 #define NOMINMAX
50 #endif
51 #include <windows.h>
52 #endif
53 
55 #include <sycl/exception_list.hpp>
57 #include <sycl/queue.hpp>
58 
59 namespace syclcompat {
60 
61 namespace detail {
62 
64 inline auto exception_handler = [](sycl::exception_list exceptions) {
65  for (std::exception_ptr const &e : exceptions) {
66  try {
67  std::rethrow_exception(e);
68  } catch (sycl::exception const &e) {
69  std::cerr << "[SYCLcompat] Caught asynchronous SYCL exception:"
70  << std::endl
71  << e.what() << std::endl
72  << "Exception caught at file:" << __FILE__
73  << ", line:" << __LINE__ << std::endl;
74  }
75  }
76 };
77 
78 } // namespace detail
79 
81 
83 
84 using device_ptr = char *;
85 
89 static void destroy_event(event_ptr event) { delete event; }
90 
91 class device_info {
92 public:
93  // get interface
94  const char *get_name() const { return _name; }
95  char *get_name() { return _name; }
96  template <typename WorkItemSizesTy = sycl::range<3>,
97  std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
98  std::is_same_v<WorkItemSizesTy, int *>,
99  int> = 0>
100  auto get_max_work_item_sizes() const {
101  if constexpr (std::is_same_v<WorkItemSizesTy, sycl::range<3>>)
102  return _max_work_item_sizes;
103  else
104  return _max_work_item_sizes_i;
105  }
106  template <typename WorkItemSizesTy = sycl::range<3>,
107  std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
108  std::is_same_v<WorkItemSizesTy, int *>,
109  int> = 0>
111  if constexpr (std::is_same_v<WorkItemSizesTy, sycl::range<3>>)
112  return _max_work_item_sizes;
113  else
114  return _max_work_item_sizes_i;
115  }
116  bool get_host_unified_memory() const { return _host_unified_memory; }
117  int get_major_version() const { return _major; }
118  int get_minor_version() const { return _minor; }
119  int get_integrated() const { return _integrated; }
120  int get_max_clock_frequency() const { return _frequency; }
121  int get_max_compute_units() const { return _max_compute_units; }
122  int get_max_work_group_size() const { return _max_work_group_size; }
123  int get_max_sub_group_size() const { return _max_sub_group_size; }
125  return _max_work_items_per_compute_unit;
126  }
128  return _max_register_size_per_work_group;
129  }
130  template <typename NDRangeSizeTy = size_t *,
131  std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
132  std::is_same_v<NDRangeSizeTy, int *>,
133  int> = 0>
134  auto get_max_nd_range_size() const {
135  if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
136  return _max_nd_range_size;
137  else
138  return _max_nd_range_size_i;
139  }
140  template <typename NDRangeSizeTy = size_t *,
141  std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
142  std::is_same_v<NDRangeSizeTy, int *>,
143  int> = 0>
145  if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
146  return _max_nd_range_size;
147  else
148  return _max_nd_range_size_i;
149  }
150  size_t get_global_mem_size() const { return _global_mem_size; }
151  size_t get_local_mem_size() const { return _local_mem_size; }
154  unsigned int get_memory_clock_rate() const { return _memory_clock_rate; }
157  unsigned int get_memory_bus_width() const { return _memory_bus_width; }
158  uint32_t get_device_id() const { return _device_id; }
159  std::array<unsigned char, 16> get_uuid() const { return _uuid; }
161  unsigned int get_global_mem_cache_size() const {
162  return _global_mem_cache_size;
163  }
164 
165  // set interface
166  void set_name(const char *name) {
167  size_t length = strlen(name);
168  if (length < device_info::NAME_BUFFER_SIZE) {
169  std::memcpy(_name, name, length + 1);
170  } else {
171  std::memcpy(_name, name, device_info::NAME_BUFFER_SIZE - 1);
172  _name[255] = '\0';
173  }
174  }
175  void set_max_work_item_sizes(const sycl::range<3> max_work_item_sizes) {
176  _max_work_item_sizes = max_work_item_sizes;
177  for (int i = 0; i < 3; ++i)
178  _max_work_item_sizes_i[i] = max_work_item_sizes[i];
179  }
180  [[deprecated]] void
181  set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes) {
182  for (int i = 0; i < 3; ++i) {
183  _max_work_item_sizes[i] = max_work_item_sizes[i];
184  _max_work_item_sizes_i[i] = max_work_item_sizes[i];
185  }
186  }
187  void set_host_unified_memory(bool host_unified_memory) {
188  _host_unified_memory = host_unified_memory;
189  }
190  void set_major_version(int major) { _major = major; }
191  void set_minor_version(int minor) { _minor = minor; }
192  void set_integrated(int integrated) { _integrated = integrated; }
193  void set_max_clock_frequency(int frequency) { _frequency = frequency; }
194  void set_max_compute_units(int max_compute_units) {
195  _max_compute_units = max_compute_units;
196  }
197  void set_global_mem_size(size_t global_mem_size) {
198  _global_mem_size = global_mem_size;
199  }
200  void set_local_mem_size(size_t local_mem_size) {
201  _local_mem_size = local_mem_size;
202  }
203  void set_max_work_group_size(int max_work_group_size) {
204  _max_work_group_size = max_work_group_size;
205  }
206  void set_max_sub_group_size(int max_sub_group_size) {
207  _max_sub_group_size = max_sub_group_size;
208  }
209  void
210  set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit) {
211  _max_work_items_per_compute_unit = max_work_items_per_compute_unit;
212  }
213  void set_max_nd_range_size(int max_nd_range_size[]) {
214  for (int i = 0; i < 3; i++) {
215  _max_nd_range_size[i] = max_nd_range_size[i];
216  _max_nd_range_size_i[i] = max_nd_range_size[i];
217  }
218  }
219  void set_memory_clock_rate(unsigned int memory_clock_rate) {
220  _memory_clock_rate = memory_clock_rate;
221  }
222  void set_memory_bus_width(unsigned int memory_bus_width) {
223  _memory_bus_width = memory_bus_width;
224  }
225  void
226  set_max_register_size_per_work_group(int max_register_size_per_work_group) {
227  _max_register_size_per_work_group = max_register_size_per_work_group;
228  }
229  void set_device_id(uint32_t device_id) { _device_id = device_id; }
230  void set_uuid(std::array<unsigned char, 16> uuid) { _uuid = std::move(uuid); }
231  void set_global_mem_cache_size(unsigned int global_mem_cache_size) {
232  _global_mem_cache_size = global_mem_cache_size;
233  }
234 
235 private:
236  constexpr static size_t NAME_BUFFER_SIZE = 256;
237 
238  char _name[device_info::NAME_BUFFER_SIZE];
239  sycl::range<3> _max_work_item_sizes;
240  int _max_work_item_sizes_i[3];
241  bool _host_unified_memory = false;
242  int _major;
243  int _minor;
244  int _integrated = 0;
245  int _frequency;
246  // Set estimated value 3200000 kHz as default value.
247  unsigned int _memory_clock_rate = 3200000;
248  // Set estimated value 64 bits as default value.
249  unsigned int _memory_bus_width = 64;
250  unsigned int _global_mem_cache_size;
251  int _max_compute_units;
252  int _max_work_group_size;
253  int _max_sub_group_size;
254  int _max_work_items_per_compute_unit;
255  int _max_register_size_per_work_group;
256  size_t _global_mem_size;
257  size_t _local_mem_size;
258  size_t _max_nd_range_size[3];
259  int _max_nd_range_size_i[3];
260  uint32_t _device_id;
261  std::array<unsigned char, 16> _uuid;
262 };
263 
265 class device_ext : public sycl::device {
266 public:
267  device_ext() : sycl::device(), _ctx(*this) {}
269  std::lock_guard<std::mutex> lock(m_mutex);
270  sycl::event::wait(_events);
271  _queues.clear();
272  }
273  device_ext(const sycl::device &base, bool print_on_async_exceptions = false,
274  bool in_order = true)
275  : sycl::device(base), _ctx(*this) {
276  if (!this->has(sycl::aspect::usm_device_allocations)) {
277  throw std::invalid_argument(
278  "Device does not support device USM allocations");
279  }
280  // calls create_queue since we don't have a locked m_mutex
281  _default_queue = create_queue(print_on_async_exceptions, in_order);
282  _saved_queue = _default_queue;
283  }
284 
285  bool is_native_host_atomic_supported() { return false; }
286  int get_major_version() const {
288  }
289 
290  int get_minor_version() const {
292  }
293 
294  int get_max_compute_units() const {
296  }
297 
301  }
302 
303  int get_integrated() const { return get_device_info().get_integrated(); }
304 
307  }
308 
311  }
312 
315  }
316 
318  return get_info<sycl::info::device::mem_base_addr_align>();
319  }
320 
321  size_t get_global_mem_size() const {
323  }
324 
330  void get_memory_info(size_t &free_memory, size_t &total_memory) const {
331 #if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105)
332  if (!has(sycl::aspect::ext_intel_free_memory)) {
333  std::cerr << "[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
334  "supported."
335  << std::endl;
336  free_memory = 0;
337  } else {
338  free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
339  }
340 #else
341  std::cerr << "[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
342  "supported."
343  << std::endl;
344  free_memory = 0;
345 #if defined(_MSC_VER) && !defined(__clang__)
346 #pragma message("Querying the number of bytes of free memory is not supported")
347 #else
348 #warning "Querying the number of bytes of free memory is not supported"
349 #endif
350 #endif
351  total_memory = get_device_info().get_global_mem_size();
352  }
353 
354  void get_device_info(device_info &out) const {
355  device_info prop;
356  prop.set_name(get_info<sycl::info::device::name>().c_str());
357 
358  int major, minor;
359  get_version(major, minor);
360  prop.set_major_version(major);
361  prop.set_minor_version(minor);
362 
364 #if (__SYCL_COMPILER_VERSION && __SYCL_COMPILER_VERSION < 20220902)
365  // oneAPI DPC++ compiler older than 2022/09/02, where
366  // max_work_item_sizes is an enum class element
367  get_info<sycl::info::device::max_work_item_sizes>());
368 #else
369  // SYCL 2020-conformant code, max_work_item_sizes is a struct templated
370  // by an int
371  get_info<sycl::info::device::max_work_item_sizes<3>>());
372 #endif
373 
375  get_info<sycl::info::device::max_clock_frequency>());
377  get_info<sycl::info::device::max_compute_units>());
379  get_info<sycl::info::device::max_work_group_size>());
380  prop.set_global_mem_size(get_info<sycl::info::device::global_mem_size>());
381  prop.set_local_mem_size(get_info<sycl::info::device::local_mem_size>());
382 
383 #if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6)
384  if (has(sycl::aspect::ext_intel_memory_clock_rate)) {
385  unsigned int tmp =
386  get_info<sycl::ext::intel::info::device::memory_clock_rate>();
387  if (tmp != 0)
388  prop.set_memory_clock_rate(1000 * tmp);
389  }
390  if (has(sycl::aspect::ext_intel_memory_bus_width)) {
392  get_info<sycl::ext::intel::info::device::memory_bus_width>());
393  }
394  if (has(sycl::aspect::ext_intel_device_id)) {
395  prop.set_device_id(get_info<sycl::ext::intel::info::device::device_id>());
396  }
397  if (has(sycl::aspect::ext_intel_device_info_uuid)) {
398  prop.set_uuid(get_info<sycl::ext::intel::info::device::uuid>());
399  }
400 #elif defined(_MSC_VER) && !defined(__clang__)
401 #pragma message("get_device_info: querying memory_clock_rate and \
402 memory_bus_width are not supported by the compiler used. \
403 Use 3200000 kHz as memory_clock_rate default value. \
404 Use 64 bits as memory_bus_width default value.")
405 #else
406 #warning "get_device_info: querying memory_clock_rate and \
407 memory_bus_width are not supported by the compiler used. \
408 Use 3200000 kHz as memory_clock_rate default value. \
409 Use 64 bits as memory_bus_width default value."
410 #endif
411 
412  size_t max_sub_group_size = 1;
413  std::vector<size_t> sub_group_sizes =
414  get_info<sycl::info::device::sub_group_sizes>();
415 
416  for (const auto &sub_group_size : sub_group_sizes) {
417  if (max_sub_group_size < sub_group_size)
418  max_sub_group_size = sub_group_size;
419  }
420 
421  prop.set_max_sub_group_size(max_sub_group_size);
422 
424  get_info<sycl::info::device::max_work_group_size>());
425  int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
426  prop.set_max_nd_range_size(max_nd_range_size);
427 
428  // Estimates max register size per work group, feel free to update the value
429  // according to device properties.
431 
433  get_info<sycl::info::device::global_mem_cache_size>());
434  out = prop;
435  }
436 
438  device_info prop;
439  get_device_info(prop);
440  return prop;
441  }
442 
443  void reset(bool print_on_async_exceptions = false, bool in_order = true) {
444  std::lock_guard<std::mutex> lock(m_mutex);
445  // The queues are shared_ptrs and the ref counts of the shared_ptrs increase
446  // only in wait_and_throw(). If there is no other thread calling
447  // wait_and_throw(), the queues will be destructed. The destructor waits for
448  // all commands executing on the queue to complete. It isn't possible to
449  // destroy a queue immediately. This is a synchronization point in SYCL.
450  _queues.clear();
451  // create new default queue
452  // calls create_queue_impl since we already have a locked m_mutex
453  _saved_queue = _default_queue =
454  create_queue_impl(print_on_async_exceptions, in_order);
455  }
456 
458  std::lock_guard<std::mutex> lock(m_mutex);
459  _queues.front().get()->wait_and_throw();
460  _queues[0] = std::make_shared<sycl::queue>(q);
461  if (_saved_queue == _default_queue)
462  _saved_queue = _queues.front().get();
463  _default_queue = _queues.front().get();
464  }
465 
466  queue_ptr default_queue() { return _default_queue; }
467 
469  std::unique_lock<std::mutex> lock(m_mutex);
470  std::vector<std::shared_ptr<sycl::queue>> current_queues(_queues);
471  lock.unlock();
472  for (const auto &q : current_queues) {
473  q->wait_and_throw();
474  }
475  // Guard the destruct of current_queues to make sure the ref count is safe.
476  lock.lock();
477  }
478  queue_ptr create_queue(bool print_on_async_exceptions = false,
479  bool in_order = true) {
480  std::lock_guard<std::mutex> lock(m_mutex);
481  return create_queue_impl(print_on_async_exceptions, in_order);
482  }
483  void destroy_queue(queue_ptr &queue) {
484  std::lock_guard<std::mutex> lock(m_mutex);
485  _queues.erase(
486  std::remove_if(_queues.begin(), _queues.end(),
487  [=](const std::shared_ptr<sycl::queue> &q) -> bool {
488  return q.get() == queue;
489  }),
490  _queues.end());
491  queue = nullptr;
492  }
494  std::lock_guard<std::mutex> lock(m_mutex);
495  _saved_queue = q;
496  }
498  std::lock_guard<std::mutex> lock(m_mutex);
499  return _saved_queue;
500  }
501  sycl::context get_context() const { return _ctx; }
502 
506  const std::initializer_list<sycl::aspect> &props) const {
507  for (const auto &it : props) {
508  if (has(it))
509  continue;
510  switch (it) {
511  case sycl::aspect::fp64:
512  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
513  "[SYCLcompat] 'double' is not supported in '" +
514  get_info<sycl::info::device::name>() +
515  "' device");
516  break;
517  case sycl::aspect::fp16:
518  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
519  "[SYCLcompat] 'half' is not supported in '" +
520  get_info<sycl::info::device::name>() +
521  "' device");
522  break;
523  default:
524 #define __SYCL_ASPECT(ASPECT, ID) \
525  case sycl::aspect::ASPECT: \
526  return #ASPECT;
527 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
528 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
529  auto getAspectNameStr = [](sycl::aspect AspectNum) -> std::string {
530  switch (AspectNum) {
531 #include <sycl/info/aspects.def>
532 #include <sycl/info/aspects_deprecated.def>
533  default:
534  return "unknown aspect";
535  }
536  };
537 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
538 #undef __SYCL_ASPECT_DEPRECATED
539 #undef __SYCL_ASPECT
540  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
541  "[SYCLcompat] '" + getAspectNameStr(it) +
542  "' is not supported in '" +
543  get_info<sycl::info::device::name>() +
544  "' device");
545  }
546  break;
547  }
548  }
549 
550 private:
553  queue_ptr create_queue_impl(bool print_on_async_exceptions = false,
554  bool in_order = true) {
555  sycl::property_list prop = {};
556  if (in_order) {
557  prop = {sycl::property::queue::in_order()};
558  }
559 #ifdef SYCLCOMPAT_PROFILING_ENABLED
560  prop.push_back(sycl::property::queue::enable_profiling());
561 #endif
562  if (print_on_async_exceptions) {
563  _queues.push_back(std::make_shared<sycl::queue>(
564  _ctx, *this, detail::exception_handler, prop));
565  } else {
566  _queues.push_back(std::make_shared<sycl::queue>(_ctx, *this, prop));
567  }
568  return _queues.back().get();
569  }
570 
571  void get_version(int &major, int &minor) const {
572  // Version string has the following format:
573  // a. OpenCL<space><major.minor><space><vendor-specific-information>
574  // b. <major.minor>
575  // c. <AmdGcnArchName> e.g gfx1030
576  std::string ver;
577  ver = get_info<sycl::info::device::version>();
578  std::string::size_type i = 0;
579  while (i < ver.size()) {
580  if (isdigit(ver[i]))
581  break;
582  i++;
583  }
584  major = std::stoi(&(ver[i]));
585  while (i < ver.size()) {
586  if (ver[i] == '.')
587  break;
588  i++;
589  }
590  if (i < ver.size()) {
591  // a. and b.
592  i++;
593  minor = std::stoi(&(ver[i]));
594  } else {
595  // c.
596  minor = 0;
597  }
598  }
599  void add_event(sycl::event event) {
600  std::lock_guard<std::mutex> lock(m_mutex);
601  _events.push_back(event);
602  }
603  friend sycl::event free_async(const std::vector<void *> &,
604  const std::vector<sycl::event> &, sycl::queue);
605  queue_ptr _default_queue;
606  queue_ptr _saved_queue;
607  sycl::context _ctx;
608  std::vector<std::shared_ptr<sycl::queue>> _queues;
609  mutable std::mutex m_mutex;
610  std::vector<sycl::event> _events;
611 };
612 
613 namespace detail {
614 
615 static inline unsigned int get_tid() {
616 #if defined(__linux__)
617  return syscall(SYS_gettid);
618 #elif defined(_WIN64)
619  return GetCurrentThreadId();
620 #else
621 #error "Only support Windows and Linux."
622 #endif
623 }
624 
626 class dev_mgr {
627 public:
629  unsigned int dev_id = current_device_id();
630  check_id(dev_id);
631  return *_devs[dev_id];
632  }
634  std::lock_guard<std::mutex> lock(m_mutex);
635  if (_cpu_device == -1) {
636  throw std::runtime_error("[SYCLcompat] No valid cpu device");
637  } else {
638  return *_devs[_cpu_device];
639  }
640  }
641  device_ext &get_device(unsigned int id) const {
642  std::lock_guard<std::mutex> lock(m_mutex);
643  check_id(id);
644  return *_devs[id];
645  }
646  unsigned int current_device_id() const {
647  std::lock_guard<std::mutex> lock(m_mutex);
648  auto it = _thread2dev_map.find(get_tid());
649  if (it != _thread2dev_map.end())
650  return it->second;
651  return _default_device_id;
652  }
653 
657  void select_device(unsigned int id) {
658  std::lock_guard<std::mutex> lock(m_mutex);
659  check_id(id);
660  _thread2dev_map[get_tid()] = id;
661  }
662  unsigned int device_count() { return _devs.size(); }
663 
664  unsigned int get_device_id(const sycl::device &dev) {
665  unsigned int id = 0;
666  for (auto dev_item : _devs) {
667  if (*dev_item == dev) {
668  break;
669  }
670  id++;
671  }
672  return id;
673  }
674 
678  template <class DeviceSelector>
679  std::enable_if_t<
680  std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
681  select_device(const DeviceSelector &selector = sycl::gpu_selector_v) {
682  sycl::device selected_device = sycl::device(selector);
683  unsigned int selected_device_id = get_device_id(selected_device);
684  select_device(selected_device_id);
685  }
686 
688  static dev_mgr &instance() {
689  static dev_mgr d_m;
690  return d_m;
691  }
692  dev_mgr(const dev_mgr &) = delete;
693  dev_mgr &operator=(const dev_mgr &) = delete;
694  dev_mgr(dev_mgr &&) = delete;
695  dev_mgr &operator=(dev_mgr &&) = delete;
696 
697 private:
698  mutable std::mutex m_mutex;
699 
700  dev_mgr() {
702  _devs.push_back(std::make_shared<device_ext>(default_device));
703 
704  std::vector<sycl::device> sycl_all_devs =
705  sycl::device::get_devices(sycl::info::device_type::all);
706  // Collect other devices except for the default device.
707  if (default_device.is_cpu())
708  _cpu_device = 0;
709  for (auto &dev : sycl_all_devs) {
710  if (dev == default_device) {
711  continue;
712  }
713  _devs.push_back(std::make_shared<device_ext>(dev));
714  if (_cpu_device == -1 && dev.is_cpu()) {
715  _cpu_device = _devs.size() - 1;
716  }
717  }
718  }
719  void check_id(unsigned int id) const {
720  if (id >= _devs.size()) {
721  throw std::runtime_error("invalid device id");
722  }
723  }
724  std::vector<std::shared_ptr<device_ext>> _devs;
728  const unsigned int _default_device_id = 0;
730  std::map<unsigned int, unsigned int> _thread2dev_map;
731  int _cpu_device = -1;
732 };
733 
734 } // namespace detail
735 
736 static inline sycl::queue create_queue(bool print_on_async_exceptions = false,
737  bool in_order = true) {
739  print_on_async_exceptions, in_order);
740 }
741 
746 }
747 
755 static inline void set_default_queue(const sycl::queue &q) {
757 }
758 
759 static inline void wait(sycl::queue q = get_default_queue()) { q.wait(); }
760 
761 static inline void wait_and_throw(sycl::queue q = get_default_queue()) {
762  q.wait_and_throw();
763 }
764 
767 static inline unsigned int get_current_device_id() {
769 }
770 
772 static inline device_ext &get_current_device() {
774 }
775 
777 static inline device_ext &get_device(unsigned int id) {
779 }
780 
784  return get_current_device().get_context();
785 }
786 
788 static inline device_ext &cpu_device() {
790 }
791 
792 static inline unsigned int select_device(unsigned int id) {
794  return id;
795 }
796 
797 template <class DeviceSelector>
798 static inline std::enable_if_t<
799  std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
800 select_device(const DeviceSelector &selector = sycl::gpu_selector_v) {
802 }
803 
804 static inline unsigned int get_device_id(const sycl::device &dev) {
806 }
807 
808 } // namespace syclcompat
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
bool is_cpu() const
Check if device is a CPU device.
Definition: device.cpp:79
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
A list of asynchronous exceptions.
const char * what() const noexcept final
Definition: exception.cpp:69
A unique identifier of an item in an index space.
Definition: id.hpp:36
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:111
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
unsigned int get_device_id(const sycl::device &dev)
Definition: device.hpp:664
unsigned int current_device_id() const
Definition: device.hpp:646
device_ext & cpu_device() const
Definition: device.hpp:633
dev_mgr(dev_mgr &&)=delete
unsigned int device_count()
Definition: device.hpp:662
dev_mgr & operator=(const dev_mgr &)=delete
static dev_mgr & instance()
Returns the instance of device manager singleton.
Definition: device.hpp:688
std::enable_if_t< std::is_invocable_r_v< int, DeviceSelector, const sycl::device & > > select_device(const DeviceSelector &selector=sycl::gpu_selector_v)
Select device with a Device Selector.
Definition: device.hpp:681
dev_mgr & operator=(dev_mgr &&)=delete
void select_device(unsigned int id)
Select device with a device ID.
Definition: device.hpp:657
device_ext & current_device()
Definition: device.hpp:628
dev_mgr(const dev_mgr &)=delete
device_ext & get_device(unsigned int id) const
Definition: device.hpp:641
device extension
Definition: device.hpp:265
void destroy_queue(queue_ptr &queue)
Definition: device.hpp:483
bool is_native_host_atomic_supported()
Definition: device.hpp:285
int get_max_compute_units() const
Definition: device.hpp:294
queue_ptr create_queue(bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:478
queue_ptr default_queue()
Definition: device.hpp:466
queue_ptr get_saved_queue() const
Definition: device.hpp:497
int get_max_sub_group_size() const
Definition: device.hpp:305
int get_major_version() const
Definition: device.hpp:286
void get_device_info(device_info &out) const
Definition: device.hpp:354
int get_max_clock_frequency() const
Return the maximum clock frequency of this device in KHz.
Definition: device.hpp:299
int get_minor_version() const
Definition: device.hpp:290
sycl::context get_context() const
Definition: device.hpp:501
void get_memory_info(size_t &free_memory, size_t &total_memory) const
Get the number of bytes of free and total memory on the SYCL device.
Definition: device.hpp:330
void set_saved_queue(queue_ptr q)
Definition: device.hpp:493
size_t get_global_mem_size() const
Definition: device.hpp:321
void set_default_queue(const sycl::queue &q)
Definition: device.hpp:457
int get_max_work_group_size() const
Definition: device.hpp:313
void reset(bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:443
int get_mem_base_addr_align() const
Definition: device.hpp:317
int get_max_register_size_per_work_group() const
Definition: device.hpp:309
void has_capability_or_fail(const std::initializer_list< sycl::aspect > &props) const
Util function to check whether a device supports some kinds of sycl::aspect.
Definition: device.hpp:505
device_info get_device_info() const
Definition: device.hpp:437
void queues_wait_and_throw()
Definition: device.hpp:468
device_ext(const sycl::device &base, bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:273
int get_integrated() const
Definition: device.hpp:303
friend sycl::event free_async(const std::vector< void * > &, const std::vector< sycl::event > &, sycl::queue)
Free the device memory pointed by a batch of pointers in pointers which are related to q after events...
Definition: memory.hpp:549
int get_minor_version() const
Definition: device.hpp:118
auto get_max_work_item_sizes() const
Definition: device.hpp:100
void set_local_mem_size(size_t local_mem_size)
Definition: device.hpp:200
int get_integrated() const
Definition: device.hpp:119
size_t get_local_mem_size() const
Definition: device.hpp:151
unsigned int get_memory_bus_width() const
Returns the maximum bus width between device and memory in bits.
Definition: device.hpp:157
unsigned int get_memory_clock_rate() const
Returns the maximum clock rate of device's global memory in kHz.
Definition: device.hpp:154
int get_max_compute_units() const
Definition: device.hpp:121
void set_max_compute_units(int max_compute_units)
Definition: device.hpp:194
void set_device_id(uint32_t device_id)
Definition: device.hpp:229
void set_max_nd_range_size(int max_nd_range_size[])
Definition: device.hpp:213
void set_memory_clock_rate(unsigned int memory_clock_rate)
Definition: device.hpp:219
void set_uuid(std::array< unsigned char, 16 > uuid)
Definition: device.hpp:230
void set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit)
Definition: device.hpp:210
void set_major_version(int major)
Definition: device.hpp:190
void set_global_mem_size(size_t global_mem_size)
Definition: device.hpp:197
int get_major_version() const
Definition: device.hpp:117
void set_integrated(int integrated)
Definition: device.hpp:192
unsigned int get_global_mem_cache_size() const
Returns global memory cache size in bytes.
Definition: device.hpp:161
auto get_max_nd_range_size() const
Definition: device.hpp:134
void set_max_work_item_sizes(const sycl::id< 3 > max_work_item_sizes)
Definition: device.hpp:181
void set_max_clock_frequency(int frequency)
Definition: device.hpp:193
std::array< unsigned char, 16 > get_uuid() const
Definition: device.hpp:159
int get_max_register_size_per_work_group() const
Definition: device.hpp:127
const char * get_name() const
Definition: device.hpp:94
void set_host_unified_memory(bool host_unified_memory)
Definition: device.hpp:187
size_t get_global_mem_size() const
Definition: device.hpp:150
int get_max_sub_group_size() const
Definition: device.hpp:123
int get_max_work_group_size() const
Definition: device.hpp:122
void set_max_work_item_sizes(const sycl::range< 3 > max_work_item_sizes)
Definition: device.hpp:175
void set_max_work_group_size(int max_work_group_size)
Definition: device.hpp:203
void set_max_register_size_per_work_group(int max_register_size_per_work_group)
Definition: device.hpp:226
auto get_max_nd_range_size()
Definition: device.hpp:144
int get_max_clock_frequency() const
Definition: device.hpp:120
uint32_t get_device_id() const
Definition: device.hpp:158
int get_max_work_items_per_compute_unit() const
Definition: device.hpp:124
void set_max_sub_group_size(int max_sub_group_size)
Definition: device.hpp:206
auto get_max_work_item_sizes()
Definition: device.hpp:110
void set_minor_version(int minor)
Definition: device.hpp:191
void set_global_mem_cache_size(unsigned int global_mem_cache_size)
Definition: device.hpp:231
bool get_host_unified_memory() const
Definition: device.hpp:116
void set_memory_bus_width(unsigned int memory_bus_width)
Definition: device.hpp:222
void set_name(const char *name)
Definition: device.hpp:166
class __SYCL2020_DEPRECATED("Host device is no longer supported.") host_selector int default_selector_v(const device &dev)
Selects SYCL host device.
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
static std::string getAspectNameStr(sycl::aspect AspectNum)
constexpr sub_group_size_key::value_t< Size > sub_group_size
Definition: properties.hpp:124
int gpu_selector_v(const device &dev)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
Definition: access.hpp:18
static unsigned int get_tid()
Definition: device.hpp:615
auto exception_handler
SYCL default exception handler.
Definition: device.hpp:64
static sycl::queue create_queue(bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:736
static sycl::queue get_default_queue()
Util function to get the default queue of current device in device manager.
Definition: device.hpp:744
sycl::queue * queue_ptr
Definition: device.hpp:82
static void wait(sycl::queue q=get_default_queue())
Definition: device.hpp:759
static device_ext & get_current_device()
Util function to get the current device.
Definition: device.hpp:772
static unsigned int get_current_device_id()
Util function to get the id of current device in device manager.
Definition: device.hpp:767
static device_ext & cpu_device()
Util function to get a CPU device.
Definition: device.hpp:788
static void destroy_event(event_ptr event)
Destroy event pointed memory.
Definition: device.hpp:89
static void set_default_queue(const sycl::queue &q)
Util function to change the default queue of the current device in the device manager If the device e...
Definition: device.hpp:755
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
Definition: math.hpp:161
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:777
static sycl::context get_default_context()
Util function to get the context of the default queue of current device in device manager.
Definition: device.hpp:783
static void wait_and_throw(sycl::queue q=get_default_queue())
Definition: device.hpp:761
char * device_ptr
Definition: device.hpp:84
static unsigned int get_device_id(const sycl::device &dev)
Definition: device.hpp:804
static unsigned int select_device(unsigned int id)
Definition: device.hpp:792