DPC++ Runtime
Runtime libraries for oneAPI DPC++
device.hpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Copyright (C) Codeplay Software Ltd.
4  *
5  * Part of the LLVM Project, under the Apache License v2.0 with LLVM
6  * Exceptions. See https://llvm.org/LICENSE.txt for license information.
7  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  *
15  * SYCL compatibility extension
16  *
17  * device.hpp
18  *
19  * Description:
20  * Device functionality for the SYCL compatibility extension
21  **************************************************************************/
22 
23 // The original source was under the license below:
24 //==---- device.hpp -------------------------------*- C++ -*----------------==//
25 //
26 // Copyright (C) Intel Corporation
27 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
28 // See https://llvm.org/LICENSE.txt for license information.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #pragma once
33 
34 #include <algorithm>
35 #include <cstring>
36 #include <iostream>
37 #include <map>
38 #include <mutex>
39 #include <set>
40 #include <sstream>
41 #include <thread>
42 #include <vector>
43 #if defined(__linux__)
44 #include <sys/syscall.h>
45 #include <unistd.h>
46 #endif
47 #if defined(_WIN64)
48 #ifndef NOMINMAX
49 #define NOMINMAX
50 #endif
51 #include <windows.h>
52 #endif
53 
55 #include <sycl/exception_list.hpp>
57 #include <sycl/queue.hpp>
58 
59 namespace syclcompat {
60 
61 namespace detail {
62 static void parse_version_string(const std::string &ver, int &major,
63  int &minor) {
64  // Version string has the following format:
65  // a. OpenCL<space><major.minor><space><vendor-specific-information>
66  // b. <major.minor>
67  // c. <AmdGcnArchName> e.g gfx1030
68  std::string::size_type i = 0;
69  while (i < ver.size()) {
70  if (isdigit(ver[i]))
71  break;
72  i++;
73  }
74  if (i < ver.size())
75  major = std::stoi(&(ver[i]));
76  else
77  major = 0;
78  while (i < ver.size()) {
79  if (ver[i] == '.')
80  break;
81  i++;
82  }
83  i++;
84  if (i < ver.size())
85  minor = std::stoi(&(ver[i]));
86  else
87  minor = 0;
88 }
89 
90 static void get_version(const sycl::device &dev, int &major, int &minor) {
91  std::string ver = dev.get_info<sycl::info::device::version>();
92  parse_version_string(ver, major, minor);
93 }
94 
96 inline auto exception_handler = [](sycl::exception_list exceptions) {
97  for (std::exception_ptr const &e : exceptions) {
98  try {
99  std::rethrow_exception(e);
100  } catch (sycl::exception const &e) {
101  std::cerr << "[SYCLcompat] Caught asynchronous SYCL exception:"
102  << std::endl
103  << e.what() << std::endl
104  << "Exception caught at file:" << __FILE__
105  << ", line:" << __LINE__ << std::endl;
106  }
107  }
108 };
109 
110 } // namespace detail
111 
113 
115 
116 using device_ptr = char *;
117 
121 static void destroy_event(event_ptr event) { delete event; }
122 
123 class device_info {
124 public:
125  // get interface
126  const char *get_name() const { return _name; }
127  char *get_name() { return _name; }
128  template <typename WorkItemSizesTy = sycl::range<3>,
129  std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
130  std::is_same_v<WorkItemSizesTy, int *>,
131  int> = 0>
132  auto get_max_work_item_sizes() const {
133  if constexpr (std::is_same_v<WorkItemSizesTy, sycl::range<3>>)
134  return _max_work_item_sizes;
135  else
136  return _max_work_item_sizes_i;
137  }
138  template <typename WorkItemSizesTy = sycl::range<3>,
139  std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
140  std::is_same_v<WorkItemSizesTy, int *>,
141  int> = 0>
143  if constexpr (std::is_same_v<WorkItemSizesTy, sycl::range<3>>)
144  return _max_work_item_sizes;
145  else
146  return _max_work_item_sizes_i;
147  }
148  bool get_host_unified_memory() const { return _host_unified_memory; }
149  int get_major_version() const { return _major; }
150  int get_minor_version() const { return _minor; }
151  int get_integrated() const { return _integrated; }
152  int get_max_clock_frequency() const { return _frequency; }
153  int get_max_compute_units() const { return _max_compute_units; }
154  int get_max_work_group_size() const { return _max_work_group_size; }
155  int get_max_sub_group_size() const { return _max_sub_group_size; }
157  return _max_work_items_per_compute_unit;
158  }
160  return _max_register_size_per_work_group;
161  }
162  template <typename NDRangeSizeTy = size_t *,
163  std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
164  std::is_same_v<NDRangeSizeTy, int *>,
165  int> = 0>
166  auto get_max_nd_range_size() const {
167  if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
168  return _max_nd_range_size;
169  else
170  return _max_nd_range_size_i;
171  }
172  template <typename NDRangeSizeTy = size_t *,
173  std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
174  std::is_same_v<NDRangeSizeTy, int *>,
175  int> = 0>
177  if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
178  return _max_nd_range_size;
179  else
180  return _max_nd_range_size_i;
181  }
182  size_t get_global_mem_size() const { return _global_mem_size; }
183  size_t get_local_mem_size() const { return _local_mem_size; }
186  unsigned int get_memory_clock_rate() const { return _memory_clock_rate; }
189  unsigned int get_memory_bus_width() const { return _memory_bus_width; }
190  uint32_t get_device_id() const { return _device_id; }
191  std::array<unsigned char, 16> get_uuid() const { return _uuid; }
193  unsigned int get_global_mem_cache_size() const {
194  return _global_mem_cache_size;
195  }
196  int get_image1d_max() const { return _image1d_max; }
197  auto get_image2d_max() const { return _image2d_max; }
198  auto get_image2d_max() { return _image2d_max; }
199  auto get_image3d_max() const { return _image3d_max; }
200  auto get_image3d_max() { return _image3d_max; }
201 
202  // set interface
203  void set_name(const char *name) {
204  size_t length = strlen(name);
205  if (length < device_info::NAME_BUFFER_SIZE) {
206  std::memcpy(_name, name, length + 1);
207  } else {
208  std::memcpy(_name, name, device_info::NAME_BUFFER_SIZE - 1);
209  _name[255] = '\0';
210  }
211  }
212  void set_max_work_item_sizes(const sycl::range<3> max_work_item_sizes) {
213  _max_work_item_sizes = max_work_item_sizes;
214  for (int i = 0; i < 3; ++i)
215  _max_work_item_sizes_i[i] = max_work_item_sizes[i];
216  }
217  [[deprecated]] void
218  set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes) {
219  for (int i = 0; i < 3; ++i) {
220  _max_work_item_sizes[i] = max_work_item_sizes[i];
221  _max_work_item_sizes_i[i] = max_work_item_sizes[i];
222  }
223  }
224  void set_host_unified_memory(bool host_unified_memory) {
225  _host_unified_memory = host_unified_memory;
226  }
227  void set_major_version(int major) { _major = major; }
228  void set_minor_version(int minor) { _minor = minor; }
229  void set_integrated(int integrated) { _integrated = integrated; }
230  void set_max_clock_frequency(int frequency) { _frequency = frequency; }
231  void set_max_compute_units(int max_compute_units) {
232  _max_compute_units = max_compute_units;
233  }
234  void set_global_mem_size(size_t global_mem_size) {
235  _global_mem_size = global_mem_size;
236  }
237  void set_local_mem_size(size_t local_mem_size) {
238  _local_mem_size = local_mem_size;
239  }
240  void set_max_work_group_size(int max_work_group_size) {
241  _max_work_group_size = max_work_group_size;
242  }
243  void set_max_sub_group_size(int max_sub_group_size) {
244  _max_sub_group_size = max_sub_group_size;
245  }
246  void
247  set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit) {
248  _max_work_items_per_compute_unit = max_work_items_per_compute_unit;
249  }
250  void set_max_nd_range_size(int max_nd_range_size[]) {
251  for (int i = 0; i < 3; i++) {
252  _max_nd_range_size[i] = max_nd_range_size[i];
253  _max_nd_range_size_i[i] = max_nd_range_size[i];
254  }
255  }
256  void set_max_nd_range_size(sycl::id<3> max_nd_range_size) {
257  for (int i = 0; i < 3; i++) {
258  _max_nd_range_size[i] = max_nd_range_size[i];
259  _max_nd_range_size_i[i] = max_nd_range_size[i];
260  }
261  }
262  void set_memory_clock_rate(unsigned int memory_clock_rate) {
263  _memory_clock_rate = memory_clock_rate;
264  }
265  void set_memory_bus_width(unsigned int memory_bus_width) {
266  _memory_bus_width = memory_bus_width;
267  }
268  void
269  set_max_register_size_per_work_group(int max_register_size_per_work_group) {
270  _max_register_size_per_work_group = max_register_size_per_work_group;
271  }
272  void set_device_id(uint32_t device_id) { _device_id = device_id; }
273  void set_uuid(std::array<unsigned char, 16> uuid) { _uuid = std::move(uuid); }
274  void set_global_mem_cache_size(unsigned int global_mem_cache_size) {
275  _global_mem_cache_size = global_mem_cache_size;
276  }
277  void set_image1d_max(size_t image_max_buffer_size) {
278  _image1d_max = image_max_buffer_size;
279  }
280  void set_image2d_max(size_t image_max_width_buffer_size,
281  size_t image_max_height_buffer_size) {
282  _image2d_max[0] = image_max_width_buffer_size;
283  _image2d_max[1] = image_max_height_buffer_size;
284  }
285  void set_image3d_max(size_t image_max_width_buffer_size,
286  size_t image_max_height_buffer_size,
287  size_t image_max_depth_buffer_size) {
288  _image3d_max[0] = image_max_width_buffer_size;
289  _image3d_max[1] = image_max_height_buffer_size;
290  _image3d_max[2] = image_max_depth_buffer_size;
291  }
292 
293 private:
294  constexpr static size_t NAME_BUFFER_SIZE = 256;
295 
296  char _name[device_info::NAME_BUFFER_SIZE];
297  sycl::range<3> _max_work_item_sizes;
298  int _max_work_item_sizes_i[3];
299  bool _host_unified_memory = false;
300  int _major;
301  int _minor;
302  int _integrated = 0;
303  int _frequency;
304  // Set estimated value 3200000 kHz as default value.
305  unsigned int _memory_clock_rate = 3200000;
306  // Set estimated value 64 bits as default value.
307  unsigned int _memory_bus_width = 64;
308  unsigned int _global_mem_cache_size;
309  int _max_compute_units;
310  int _max_work_group_size;
311  int _max_sub_group_size;
312  int _max_work_items_per_compute_unit;
313  int _max_register_size_per_work_group;
314  size_t _global_mem_size;
315  size_t _local_mem_size;
316  size_t _max_nd_range_size[3];
317  int _max_nd_range_size_i[3];
318  uint32_t _device_id;
319  std::array<unsigned char, 16> _uuid;
320  int _image1d_max;
321  int _image2d_max[2];
322  int _image3d_max[3];
323 };
324 
325 static int get_major_version(const sycl::device &dev) {
326  int major, minor;
327  detail::get_version(dev, major, minor);
328  return major;
329 }
330 
331 static int get_minor_version(const sycl::device &dev) {
332  int major, minor;
333  detail::get_version(dev, major, minor);
334  return minor;
335 }
336 
338 class device_ext : public sycl::device {
339 public:
340  device_ext() : sycl::device(), _ctx(*this) {}
342  try {
343  std::lock_guard<std::mutex> lock(m_mutex);
344  sycl::event::wait(_events);
345  _queues.clear();
346  } catch (std::exception &e) {
347  __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_ext", e);
348  }
349  }
350  device_ext(const sycl::device &base, bool print_on_async_exceptions = false,
351  bool in_order = true)
352  : sycl::device(base), _ctx(*this) {
353  if (!this->has(sycl::aspect::usm_device_allocations)) {
354  throw std::invalid_argument(
355  "Device does not support device USM allocations");
356  }
357  // calls create_queue since we don't have a locked m_mutex
358  _default_queue = create_queue(print_on_async_exceptions, in_order);
359  _saved_queue = _default_queue;
360  }
361 
362  bool is_native_host_atomic_supported() { return false; }
363  int get_major_version() const { return syclcompat::get_major_version(*this); }
364 
365  int get_minor_version() const { return syclcompat::get_minor_version(*this); }
366 
367  int get_max_compute_units() const {
369  }
370 
374  }
375 
376  int get_integrated() const { return get_device_info().get_integrated(); }
377 
380  }
381 
384  }
385 
388  }
389 
391  return get_info<sycl::info::device::mem_base_addr_align>();
392  }
393 
394  size_t get_global_mem_size() const {
396  }
397 
403  void get_memory_info(size_t &free_memory, size_t &total_memory) const {
404 #if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105)
405  if (!has(sycl::aspect::ext_intel_free_memory)) {
406  std::cerr << "[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
407  "supported."
408  << std::endl;
409  free_memory = 0;
410  } else {
411  free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
412  }
413 #else
414  std::cerr << "[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
415  "supported."
416  << std::endl;
417  free_memory = 0;
418 #if defined(_MSC_VER) && !defined(__clang__)
419 #pragma message("Querying the number of bytes of free memory is not supported")
420 #else
421 #warning "Querying the number of bytes of free memory is not supported"
422 #endif
423 #endif
424  total_memory = get_device_info().get_global_mem_size();
425  }
426 
427  void get_device_info(device_info &out) const {
428  device_info prop;
429  prop.set_name(get_info<sycl::info::device::name>().c_str());
430 
431  int major, minor;
432  get_version(major, minor);
433  prop.set_major_version(major);
434  prop.set_minor_version(minor);
435 
437 #if (__SYCL_COMPILER_VERSION && __SYCL_COMPILER_VERSION < 20220902)
438  // oneAPI DPC++ compiler older than 2022/09/02, where
439  // max_work_item_sizes is an enum class element
440  get_info<sycl::info::device::max_work_item_sizes>());
441 #else
442  // SYCL 2020-conformant code, max_work_item_sizes is a struct templated
443  // by an int
444  get_info<sycl::info::device::max_work_item_sizes<3>>());
445 #endif
446  prop.set_host_unified_memory(has(sycl::aspect::usm_host_allocations));
447 
449  get_info<sycl::info::device::max_clock_frequency>());
451  get_info<sycl::info::device::max_compute_units>());
453  get_info<sycl::info::device::max_work_group_size>());
454  prop.set_global_mem_size(get_info<sycl::info::device::global_mem_size>());
455  prop.set_local_mem_size(get_info<sycl::info::device::local_mem_size>());
456 
457 #if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6)
458  if (has(sycl::aspect::ext_intel_memory_clock_rate)) {
459  unsigned int tmp =
460  get_info<sycl::ext::intel::info::device::memory_clock_rate>();
461  if (tmp != 0)
462  prop.set_memory_clock_rate(1000 * tmp);
463  }
464  if (has(sycl::aspect::ext_intel_memory_bus_width)) {
466  get_info<sycl::ext::intel::info::device::memory_bus_width>());
467  }
468  if (has(sycl::aspect::ext_intel_device_id)) {
469  prop.set_device_id(get_info<sycl::ext::intel::info::device::device_id>());
470  }
471  if (has(sycl::aspect::ext_intel_device_info_uuid)) {
472  prop.set_uuid(get_info<sycl::ext::intel::info::device::uuid>());
473  }
474 #elif defined(_MSC_VER) && !defined(__clang__)
475 #pragma message("get_device_info: querying memory_clock_rate and \
476 memory_bus_width are not supported by the compiler used. \
477 Use 3200000 kHz as memory_clock_rate default value. \
478 Use 64 bits as memory_bus_width default value.")
479 #else
480 #warning "get_device_info: querying memory_clock_rate and \
481 memory_bus_width are not supported by the compiler used. \
482 Use 3200000 kHz as memory_clock_rate default value. \
483 Use 64 bits as memory_bus_width default value."
484 #endif
485 
486  size_t max_sub_group_size = 1;
487  std::vector<size_t> sub_group_sizes =
488  get_info<sycl::info::device::sub_group_sizes>();
489 
490  for (const auto &sub_group_size : sub_group_sizes) {
491  if (max_sub_group_size < sub_group_size)
492  max_sub_group_size = sub_group_size;
493  }
494 
495  prop.set_max_sub_group_size(max_sub_group_size);
496 
498  get_info<sycl::info::device::max_work_group_size>());
499 #ifdef SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY
502  3>>());
503 #else
504 #if defined(_MSC_VER) && !defined(__clang__)
505 #pragma message("get_device_info: querying the maximum number \
506  of work groups is not supported.")
507 #else
508 #warning "get_device_info: querying the maximum number of \
509  work groups is not supported."
510 #endif
511  int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
512  prop.set_max_nd_range_size(max_nd_range_size);
513 #endif
514 
515  // Estimates max register size per work group, feel free to update the value
516  // according to device properties.
518 
520  get_info<sycl::info::device::global_mem_cache_size>());
521 
522  prop.set_image1d_max(get_info<sycl::info::device::image_max_buffer_size>());
523  prop.set_image1d_max(get_info<sycl::info::device::image_max_buffer_size>());
524  prop.set_image2d_max(get_info<sycl::info::device::image2d_max_width>(),
525  get_info<sycl::info::device::image2d_max_height>());
526  prop.set_image3d_max(get_info<sycl::info::device::image3d_max_width>(),
527  get_info<sycl::info::device::image3d_max_height>(),
528  get_info<sycl::info::device::image3d_max_height>());
529  out = prop;
530  }
531 
533  device_info prop;
534  get_device_info(prop);
535  return prop;
536  }
537 
538  void reset(bool print_on_async_exceptions = false, bool in_order = true) {
539  std::lock_guard<std::mutex> lock(m_mutex);
540  // The queues are shared_ptrs and the ref counts of the shared_ptrs increase
541  // only in wait_and_throw(). If there is no other thread calling
542  // wait_and_throw(), the queues will be destructed. The destructor waits for
543  // all commands executing on the queue to complete. It isn't possible to
544  // destroy a queue immediately. This is a synchronization point in SYCL.
545  _queues.clear();
546  // create new default queue
547  // calls create_queue_impl since we already have a locked m_mutex
548 
549  _saved_queue = _default_queue =
550  in_order ? create_queue_impl(print_on_async_exceptions,
551  sycl::property::queue::in_order())
552  : create_queue_impl(print_on_async_exceptions);
553  }
554 
556  std::lock_guard<std::mutex> lock(m_mutex);
557  _queues.front().get()->wait_and_throw();
558  _queues[0] = std::make_shared<sycl::queue>(q);
559  if (_saved_queue == _default_queue)
560  _saved_queue = _queues.front().get();
561  _default_queue = _queues.front().get();
562  }
563 
564  queue_ptr default_queue() { return _default_queue; }
565 
567  std::unique_lock<std::mutex> lock(m_mutex);
568  std::vector<std::shared_ptr<sycl::queue>> current_queues(_queues);
569  lock.unlock();
570  for (const auto &q : current_queues) {
571  q->wait_and_throw();
572  }
573  // Guard the destruct of current_queues to make sure the ref count is safe.
574  lock.lock();
575  }
576  queue_ptr create_queue(bool print_on_async_exceptions = false,
577  bool in_order = true) {
578  std::lock_guard<std::mutex> lock(m_mutex);
579  return in_order ? create_queue_impl(print_on_async_exceptions,
580  sycl::property::queue::in_order())
581  : create_queue_impl(print_on_async_exceptions);
582  }
583  void destroy_queue(queue_ptr &queue) {
584  std::lock_guard<std::mutex> lock(m_mutex);
585  _queues.erase(
586  std::remove_if(_queues.begin(), _queues.end(),
587  [=](const std::shared_ptr<sycl::queue> &q) -> bool {
588  return q.get() == queue;
589  }),
590  _queues.end());
591  queue = nullptr;
592  }
594  std::lock_guard<std::mutex> lock(m_mutex);
595  _saved_queue = q;
596  }
598  std::lock_guard<std::mutex> lock(m_mutex);
599  return _saved_queue;
600  }
601  sycl::context get_context() const { return _ctx; }
602 
606  const std::initializer_list<sycl::aspect> &props) const {
607  for (const auto &it : props) {
608  if (has(it))
609  continue;
610  switch (it) {
611  case sycl::aspect::fp64:
612  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
613  "[SYCLcompat] 'double' is not supported in '" +
614  get_info<sycl::info::device::name>() +
615  "' device");
616  break;
617  case sycl::aspect::fp16:
618  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
619  "[SYCLcompat] 'half' is not supported in '" +
620  get_info<sycl::info::device::name>() +
621  "' device");
622  break;
623  default:
624 #define __SYCL_ASPECT(ASPECT, ID) \
625  case sycl::aspect::ASPECT: \
626  return #ASPECT;
627 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
628 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
629  auto getAspectNameStr = [](sycl::aspect AspectNum) -> std::string {
630  switch (AspectNum) {
631 #include <sycl/info/aspects.def>
632 #include <sycl/info/aspects_deprecated.def>
633  default:
634  return "unknown aspect";
635  }
636  };
637 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
638 #undef __SYCL_ASPECT_DEPRECATED
639 #undef __SYCL_ASPECT
640  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
641  "[SYCLcompat] '" + getAspectNameStr(it) +
642  "' is not supported in '" +
643  get_info<sycl::info::device::name>() +
644  "' device");
645  }
646  break;
647  }
648  }
649 
650 private:
653  template <typename... PropertiesT>
654  queue_ptr create_queue_impl(bool print_on_async_exceptions = false,
655  PropertiesT... properties) {
657 #ifdef SYCLCOMPAT_PROFILING_ENABLED
658  sycl::property::queue::enable_profiling(),
659 #endif
660  properties...);
661  if (print_on_async_exceptions) {
662  _queues.push_back(std::make_shared<sycl::queue>(
663  _ctx, *this, detail::exception_handler, prop));
664  } else {
665  _queues.push_back(std::make_shared<sycl::queue>(_ctx, *this, prop));
666  }
667  return _queues.back().get();
668  }
669 
670  void get_version(int &major, int &minor) const {
671  detail::get_version(*this, major, minor);
672  }
673  void add_event(sycl::event event) {
674  std::lock_guard<std::mutex> lock(m_mutex);
675  _events.push_back(event);
676  }
677  friend sycl::event enqueue_free(const std::vector<void *> &,
678  const std::vector<sycl::event> &,
679  sycl::queue);
680  queue_ptr _default_queue;
681  queue_ptr _saved_queue;
682  sycl::context _ctx;
683  std::vector<std::shared_ptr<sycl::queue>> _queues;
684  mutable std::mutex m_mutex;
685  std::vector<sycl::event> _events;
686 };
687 
688 namespace detail {
689 
690 static inline unsigned int get_tid() {
691 #if defined(__linux__)
692  return syscall(SYS_gettid);
693 #elif defined(_WIN64)
694  return GetCurrentThreadId();
695 #else
696 #error "Only support Windows and Linux."
697 #endif
698 }
699 
701 class dev_mgr {
702 public:
704  unsigned int dev_id = current_device_id();
705  check_id(dev_id);
706  return *_devs[dev_id];
707  }
709  std::lock_guard<std::mutex> lock(m_mutex);
710  if (_cpu_device == -1) {
711  throw std::runtime_error("[SYCLcompat] No valid cpu device");
712  } else {
713  return *_devs[_cpu_device];
714  }
715  }
716  device_ext &get_device(unsigned int id) const {
717  std::lock_guard<std::mutex> lock(m_mutex);
718  check_id(id);
719  return *_devs[id];
720  }
721  unsigned int current_device_id() const {
722  std::lock_guard<std::mutex> lock(m_mutex);
723  auto it = _thread2dev_map.find(get_tid());
724  if (it != _thread2dev_map.end())
725  return it->second;
726  return _default_device_id;
727  }
728 
732  void select_device(unsigned int id) {
733  std::lock_guard<std::mutex> lock(m_mutex);
734  check_id(id);
735  _thread2dev_map[get_tid()] = id;
736  }
737  unsigned int device_count() { return _devs.size(); }
738 
739  unsigned int get_device_id(const sycl::device &dev) {
740  if (!_devs.size()) {
741  throw std::runtime_error(
742  "[SYCLcompat] No SYCL devices found in the device list. Device list "
743  "may have been filtered by syclcompat::filter_device");
744  }
745  unsigned int id = 0;
746  for (auto dev_item : _devs) {
747  if (*dev_item == dev) {
748  return id;
749  }
750  id++;
751  }
752  throw std::runtime_error("[SYCLcompat] The device[" +
753  dev.get_info<sycl::info::device::name>() +
754  "] is filtered out by syclcompat::filter_device "
755  "in current device list!");
756  }
757 
759  void list_devices() const {
760  for (size_t i = 0; i < _devs.size(); ++i) {
761  std::cout << "Device " << i << ": "
762  << _devs[i]->get_info<sycl::info::device::name>() << std::endl;
763  }
764  }
765 
770  void filter(const std::vector<std::string> &dev_subnames) {
771  std::lock_guard<std::mutex> lock(m_mutex);
772  auto iter = _devs.begin();
773  while (iter != _devs.end()) {
774  std::string dev_name = (*iter)->get_info<sycl::info::device::name>();
775  bool matched = false;
776  for (const auto &name : dev_subnames) {
777  if (dev_name.find(name) != std::string::npos) {
778  matched = true;
779  break;
780  }
781  }
782  if (matched)
783  ++iter;
784  else
785  iter = _devs.erase(iter);
786  }
787  _cpu_device = -1;
788  for (unsigned i = 0; i < _devs.size(); ++i) {
789  if (_devs[i]->is_cpu()) {
790  _cpu_device = i;
791  break;
792  }
793  }
794  _thread2dev_map.clear();
795 #ifdef SYCLCOMPAT_VERBOSE
796  list_devices();
797 #endif
798  }
799 
803  template <class DeviceSelector>
804  std::enable_if_t<
805  std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
806  select_device(const DeviceSelector &selector = sycl::gpu_selector_v) {
807  sycl::device selected_device = sycl::device(selector);
808  unsigned int selected_device_id = get_device_id(selected_device);
809  select_device(selected_device_id);
810  }
811 
813  static dev_mgr &instance() {
814  static dev_mgr d_m;
815  return d_m;
816  }
817  dev_mgr(const dev_mgr &) = delete;
818  dev_mgr &operator=(const dev_mgr &) = delete;
819  dev_mgr(dev_mgr &&) = delete;
820  dev_mgr &operator=(dev_mgr &&) = delete;
821 
822 private:
823  mutable std::mutex m_mutex;
824 
825  dev_mgr() {
827  _devs.push_back(std::make_shared<device_ext>(default_device));
828 
829  std::vector<sycl::device> sycl_all_devs =
830  sycl::device::get_devices(sycl::info::device_type::all);
831  // Collect other devices except for the default device.
832  if (default_device.is_cpu())
833  _cpu_device = 0;
834  for (auto &dev : sycl_all_devs) {
835  if (dev == default_device) {
836  continue;
837  }
838  _devs.push_back(std::make_shared<device_ext>(dev));
839  if (_cpu_device == -1 && dev.is_cpu()) {
840  _cpu_device = _devs.size() - 1;
841  }
842  }
843 #ifdef SYCLCOMPAT_VERBOSE
844  list_devices();
845 #endif
846  }
847  void check_id(unsigned int id) const {
848  if (id >= _devs.size()) {
849  throw std::runtime_error("invalid device id");
850  }
851  }
852  std::vector<std::shared_ptr<device_ext>> _devs;
856  const unsigned int _default_device_id = 0;
858  std::map<unsigned int, unsigned int> _thread2dev_map;
859  int _cpu_device = -1;
860 };
861 
862 } // namespace detail
863 
864 static inline sycl::queue create_queue(bool print_on_async_exceptions = false,
865  bool in_order = true) {
867  print_on_async_exceptions, in_order);
868 }
869 
874 }
875 
883 static inline void set_default_queue(const sycl::queue &q) {
885 }
886 
887 static inline void wait(sycl::queue q = get_default_queue()) { q.wait(); }
888 
889 static inline void wait_and_throw(sycl::queue q = get_default_queue()) {
890  q.wait_and_throw();
891 }
892 
895 static inline unsigned int get_current_device_id() {
897 }
898 
900 static inline device_ext &get_current_device() {
902 }
903 
905 static inline device_ext &get_device(unsigned int id) {
907 }
908 
912  return get_current_device().get_context();
913 }
914 
916 static inline device_ext &cpu_device() {
918 }
919 
924 static inline void filter_device(const std::vector<std::string> &dev_subnames) {
925  detail::dev_mgr::instance().filter(dev_subnames);
926 }
927 
929 static inline void list_devices() {
931 }
932 
933 static inline unsigned int select_device(unsigned int id) {
935  return id;
936 }
937 
938 template <class DeviceSelector>
939 static inline std::enable_if_t<
940  std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
941 select_device(const DeviceSelector &selector = sycl::gpu_selector_v) {
943 }
944 
945 static inline unsigned int get_device_id(const sycl::device &dev) {
947 }
948 
949 static inline unsigned int device_count() {
951 }
952 } // namespace syclcompat
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Definition: device.hpp:215
bool is_cpu() const
Get instance of device.
Definition: device.cpp:75
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
A list of asynchronous exceptions.
const char * what() const noexcept final
Definition: exception.cpp:49
A unique identifier of an item in an index space.
Definition: id.hpp:36
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
unsigned int get_device_id(const sycl::device &dev)
Definition: device.hpp:739
unsigned int current_device_id() const
Definition: device.hpp:721
void filter(const std::vector< std::string > &dev_subnames)
Filter out devices; only keep the device whose name contains one of the subname in dev_subnames.
Definition: device.hpp:770
device_ext & cpu_device() const
Definition: device.hpp:708
dev_mgr(dev_mgr &&)=delete
unsigned int device_count()
Definition: device.hpp:737
dev_mgr & operator=(const dev_mgr &)=delete
static dev_mgr & instance()
Returns the instance of device manager singleton.
Definition: device.hpp:813
void list_devices() const
List all the devices with its id in dev_mgr.
Definition: device.hpp:759
std::enable_if_t< std::is_invocable_r_v< int, DeviceSelector, const sycl::device & > > select_device(const DeviceSelector &selector=sycl::gpu_selector_v)
Select device with a Device Selector.
Definition: device.hpp:806
dev_mgr & operator=(dev_mgr &&)=delete
void select_device(unsigned int id)
Select device with a device ID.
Definition: device.hpp:732
device_ext & current_device()
Definition: device.hpp:703
dev_mgr(const dev_mgr &)=delete
device_ext & get_device(unsigned int id) const
Definition: device.hpp:716
device extension
Definition: device.hpp:338
void destroy_queue(queue_ptr &queue)
Definition: device.hpp:583
bool is_native_host_atomic_supported()
Definition: device.hpp:362
int get_max_compute_units() const
Definition: device.hpp:367
queue_ptr create_queue(bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:576
queue_ptr default_queue()
Definition: device.hpp:564
friend sycl::event enqueue_free(const std::vector< void * > &, const std::vector< sycl::event > &, sycl::queue)
Enqueues the release of all pointers in /p pointers on the /p q.
Definition: memory.hpp:647
queue_ptr get_saved_queue() const
Definition: device.hpp:597
int get_max_sub_group_size() const
Definition: device.hpp:378
int get_major_version() const
Definition: device.hpp:363
void get_device_info(device_info &out) const
Definition: device.hpp:427
int get_max_clock_frequency() const
Return the maximum clock frequency of this device in KHz.
Definition: device.hpp:372
int get_minor_version() const
Definition: device.hpp:365
sycl::context get_context() const
Definition: device.hpp:601
void get_memory_info(size_t &free_memory, size_t &total_memory) const
Get the number of bytes of free and total memory on the SYCL device.
Definition: device.hpp:403
void set_saved_queue(queue_ptr q)
Definition: device.hpp:593
size_t get_global_mem_size() const
Definition: device.hpp:394
void set_default_queue(const sycl::queue &q)
Definition: device.hpp:555
int get_max_work_group_size() const
Definition: device.hpp:386
void reset(bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:538
int get_mem_base_addr_align() const
Definition: device.hpp:390
int get_max_register_size_per_work_group() const
Definition: device.hpp:382
void has_capability_or_fail(const std::initializer_list< sycl::aspect > &props) const
Util function to check whether a device supports some kinds of sycl::aspect.
Definition: device.hpp:605
device_info get_device_info() const
Definition: device.hpp:532
void queues_wait_and_throw()
Definition: device.hpp:566
device_ext(const sycl::device &base, bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:350
int get_integrated() const
Definition: device.hpp:376
void set_image3d_max(size_t image_max_width_buffer_size, size_t image_max_height_buffer_size, size_t image_max_depth_buffer_size)
Definition: device.hpp:285
int get_minor_version() const
Definition: device.hpp:150
auto get_max_work_item_sizes() const
Definition: device.hpp:132
void set_local_mem_size(size_t local_mem_size)
Definition: device.hpp:237
int get_integrated() const
Definition: device.hpp:151
size_t get_local_mem_size() const
Definition: device.hpp:183
unsigned int get_memory_bus_width() const
Returns the maximum bus width between device and memory in bits.
Definition: device.hpp:189
unsigned int get_memory_clock_rate() const
Returns the maximum clock rate of device's global memory in kHz.
Definition: device.hpp:186
int get_max_compute_units() const
Definition: device.hpp:153
void set_max_compute_units(int max_compute_units)
Definition: device.hpp:231
void set_device_id(uint32_t device_id)
Definition: device.hpp:272
auto get_image3d_max() const
Definition: device.hpp:199
void set_max_nd_range_size(int max_nd_range_size[])
Definition: device.hpp:250
void set_memory_clock_rate(unsigned int memory_clock_rate)
Definition: device.hpp:262
void set_image2d_max(size_t image_max_width_buffer_size, size_t image_max_height_buffer_size)
Definition: device.hpp:280
void set_uuid(std::array< unsigned char, 16 > uuid)
Definition: device.hpp:273
void set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit)
Definition: device.hpp:247
void set_major_version(int major)
Definition: device.hpp:227
void set_global_mem_size(size_t global_mem_size)
Definition: device.hpp:234
void set_max_nd_range_size(sycl::id< 3 > max_nd_range_size)
Definition: device.hpp:256
int get_major_version() const
Definition: device.hpp:149
void set_integrated(int integrated)
Definition: device.hpp:229
unsigned int get_global_mem_cache_size() const
Returns global memory cache size in bytes.
Definition: device.hpp:193
auto get_max_nd_range_size() const
Definition: device.hpp:166
void set_max_work_item_sizes(const sycl::id< 3 > max_work_item_sizes)
Definition: device.hpp:218
void set_max_clock_frequency(int frequency)
Definition: device.hpp:230
int get_image1d_max() const
Definition: device.hpp:196
auto get_image2d_max() const
Definition: device.hpp:197
std::array< unsigned char, 16 > get_uuid() const
Definition: device.hpp:191
int get_max_register_size_per_work_group() const
Definition: device.hpp:159
const char * get_name() const
Definition: device.hpp:126
void set_host_unified_memory(bool host_unified_memory)
Definition: device.hpp:224
size_t get_global_mem_size() const
Definition: device.hpp:182
int get_max_sub_group_size() const
Definition: device.hpp:155
int get_max_work_group_size() const
Definition: device.hpp:154
void set_max_work_item_sizes(const sycl::range< 3 > max_work_item_sizes)
Definition: device.hpp:212
void set_max_work_group_size(int max_work_group_size)
Definition: device.hpp:240
void set_max_register_size_per_work_group(int max_register_size_per_work_group)
Definition: device.hpp:269
auto get_max_nd_range_size()
Definition: device.hpp:176
int get_max_clock_frequency() const
Definition: device.hpp:152
uint32_t get_device_id() const
Definition: device.hpp:190
int get_max_work_items_per_compute_unit() const
Definition: device.hpp:156
void set_max_sub_group_size(int max_sub_group_size)
Definition: device.hpp:243
auto get_max_work_item_sizes()
Definition: device.hpp:142
void set_minor_version(int minor)
Definition: device.hpp:228
void set_global_mem_cache_size(unsigned int global_mem_cache_size)
Definition: device.hpp:274
void set_image1d_max(size_t image_max_buffer_size)
Definition: device.hpp:277
bool get_host_unified_memory() const
Definition: device.hpp:148
void set_memory_bus_width(unsigned int memory_bus_width)
Definition: device.hpp:265
void set_name(const char *name)
Definition: device.hpp:203
#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e)
Definition: common.hpp:367
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
static std::string getAspectNameStr(sycl::aspect AspectNum)
constexpr sub_group_size_key::value_t< Size > sub_group_size
Definition: properties.hpp:149
int default_selector_v(const device &dev)
int gpu_selector_v(const device &dev)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:65
Definition: access.hpp:18
static unsigned int get_tid()
Definition: device.hpp:690
static void parse_version_string(const std::string &ver, int &major, int &minor)
Definition: device.hpp:62
auto exception_handler
SYCL default exception handler.
Definition: device.hpp:96
static void get_version(const sycl::device &dev, int &major, int &minor)
Definition: device.hpp:90
static sycl::queue create_queue(bool print_on_async_exceptions=false, bool in_order=true)
Definition: device.hpp:864
static sycl::queue get_default_queue()
Util function to get the default queue of current device in device manager.
Definition: device.hpp:872
sycl::queue * queue_ptr
Definition: device.hpp:114
static void wait(sycl::queue q=get_default_queue())
Definition: device.hpp:887
static device_ext & get_current_device()
Util function to get the current device.
Definition: device.hpp:900
static int get_minor_version(const sycl::device &dev)
Definition: device.hpp:331
static unsigned int get_current_device_id()
Util function to get the id of current device in device manager.
Definition: device.hpp:895
static unsigned int device_count()
Definition: device.hpp:949
static device_ext & cpu_device()
Util function to get a CPU device.
Definition: device.hpp:916
static void destroy_event(event_ptr event)
Destroy event pointed memory.
Definition: device.hpp:121
static void set_default_queue(const sycl::queue &q)
Util function to change the default queue of the current device in the device manager If the device e...
Definition: device.hpp:883
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
Definition: math.hpp:436
static int get_major_version(const sycl::device &dev)
Definition: device.hpp:325
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:905
static sycl::context get_default_context()
Util function to get the context of the default queue of current device in device manager.
Definition: device.hpp:911
static void wait_and_throw(sycl::queue q=get_default_queue())
Definition: device.hpp:889
static void filter_device(const std::vector< std::string > &dev_subnames)
Filter out devices; only keep the device whose name contains one of the subname in dev_subnames.
Definition: device.hpp:924
static void list_devices()
List all the devices with its id in dev_mgr.
Definition: device.hpp:929
char * device_ptr
Definition: device.hpp:116
static unsigned int get_device_id(const sycl::device &dev)
Definition: device.hpp:945
static unsigned int select_device(unsigned int id)
Definition: device.hpp:933