43 #if defined(__linux__)
44 #include <sys/syscall.h>
65 for (std::exception_ptr
const &e : exceptions) {
67 std::rethrow_exception(e);
69 std::cerr <<
"[SYCLcompat] Caught asynchronous SYCL exception:"
71 << e.
what() << std::endl
72 <<
"Exception caught at file:" << __FILE__
73 <<
", line:" << __LINE__ << std::endl;
96 template <
typename WorkItemSizesTy = sycl::range<3>,
97 std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
98 std::is_same_v<WorkItemSizesTy,
int *>,
102 return _max_work_item_sizes;
104 return _max_work_item_sizes_i;
106 template <
typename WorkItemSizesTy = sycl::range<3>,
107 std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
108 std::is_same_v<WorkItemSizesTy,
int *>,
112 return _max_work_item_sizes;
114 return _max_work_item_sizes_i;
125 return _max_work_items_per_compute_unit;
128 return _max_register_size_per_work_group;
130 template <
typename NDRangeSizeTy =
size_t *,
131 std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
132 std::is_same_v<NDRangeSizeTy, int *>,
135 if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
136 return _max_nd_range_size;
138 return _max_nd_range_size_i;
140 template <
typename NDRangeSizeTy =
size_t *,
141 std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
142 std::is_same_v<NDRangeSizeTy, int *>,
145 if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
146 return _max_nd_range_size;
148 return _max_nd_range_size_i;
159 std::array<unsigned char, 16>
get_uuid()
const {
return _uuid; }
162 return _global_mem_cache_size;
167 size_t length = strlen(name);
168 if (
length < device_info::NAME_BUFFER_SIZE) {
169 std::memcpy(_name, name,
length + 1);
171 std::memcpy(_name, name, device_info::NAME_BUFFER_SIZE - 1);
176 _max_work_item_sizes = max_work_item_sizes;
177 for (
int i = 0; i < 3; ++i)
178 _max_work_item_sizes_i[i] = max_work_item_sizes[i];
182 for (
int i = 0; i < 3; ++i) {
183 _max_work_item_sizes[i] = max_work_item_sizes[i];
184 _max_work_item_sizes_i[i] = max_work_item_sizes[i];
188 _host_unified_memory = host_unified_memory;
195 _max_compute_units = max_compute_units;
198 _global_mem_size = global_mem_size;
201 _local_mem_size = local_mem_size;
204 _max_work_group_size = max_work_group_size;
207 _max_sub_group_size = max_sub_group_size;
211 _max_work_items_per_compute_unit = max_work_items_per_compute_unit;
214 for (
int i = 0; i < 3; i++) {
215 _max_nd_range_size[i] = max_nd_range_size[i];
216 _max_nd_range_size_i[i] = max_nd_range_size[i];
220 _memory_clock_rate = memory_clock_rate;
223 _memory_bus_width = memory_bus_width;
227 _max_register_size_per_work_group = max_register_size_per_work_group;
230 void set_uuid(std::array<unsigned char, 16> uuid) { _uuid = std::move(uuid); }
232 _global_mem_cache_size = global_mem_cache_size;
236 constexpr
static size_t NAME_BUFFER_SIZE = 256;
238 char _name[device_info::NAME_BUFFER_SIZE];
240 int _max_work_item_sizes_i[3];
241 bool _host_unified_memory =
false;
247 unsigned int _memory_clock_rate = 3200000;
249 unsigned int _memory_bus_width = 64;
250 unsigned int _global_mem_cache_size;
251 int _max_compute_units;
252 int _max_work_group_size;
253 int _max_sub_group_size;
254 int _max_work_items_per_compute_unit;
255 int _max_register_size_per_work_group;
256 size_t _global_mem_size;
257 size_t _local_mem_size;
258 size_t _max_nd_range_size[3];
259 int _max_nd_range_size_i[3];
261 std::array<unsigned char, 16> _uuid;
269 std::lock_guard<std::mutex> lock(m_mutex);
274 bool in_order =
true)
275 :
sycl::device(base), _ctx(*this) {
276 if (!this->has(sycl::aspect::usm_device_allocations)) {
277 throw std::invalid_argument(
278 "Device does not support device USM allocations");
281 _default_queue =
create_queue(print_on_async_exceptions, in_order);
282 _saved_queue = _default_queue;
318 return get_info<sycl::info::device::mem_base_addr_align>();
331 #if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105)
332 if (!has(sycl::aspect::ext_intel_free_memory)) {
333 std::cerr <<
"[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
338 free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
341 std::cerr <<
"[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
345 #if defined(_MSC_VER) && !defined(__clang__)
346 #pragma message("Querying the number of bytes of free memory is not supported")
348 #warning "Querying the number of bytes of free memory is not supported"
356 prop.
set_name(get_info<sycl::info::device::name>().c_str());
359 get_version(major, minor);
364 #
if (__SYCL_COMPILER_VERSION && __SYCL_COMPILER_VERSION < 20220902)
367 get_info<sycl::info::device::max_work_item_sizes>());
371 get_info<sycl::info::device::max_work_item_sizes<3>>());
375 get_info<sycl::info::device::max_clock_frequency>());
377 get_info<sycl::info::device::max_compute_units>());
379 get_info<sycl::info::device::max_work_group_size>());
383 #if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6)
384 if (has(sycl::aspect::ext_intel_memory_clock_rate)) {
386 get_info<sycl::ext::intel::info::device::memory_clock_rate>();
390 if (has(sycl::aspect::ext_intel_memory_bus_width)) {
392 get_info<sycl::ext::intel::info::device::memory_bus_width>());
394 if (has(sycl::aspect::ext_intel_device_id)) {
395 prop.
set_device_id(get_info<sycl::ext::intel::info::device::device_id>());
397 if (has(sycl::aspect::ext_intel_device_info_uuid)) {
398 prop.
set_uuid(get_info<sycl::ext::intel::info::device::uuid>());
400 #elif defined(_MSC_VER) && !defined(__clang__)
401 #pragma message("get_device_info: querying memory_clock_rate and \
402 memory_bus_width are not supported by the compiler used. \
403 Use 3200000 kHz as memory_clock_rate default value. \
404 Use 64 bits as memory_bus_width default value.")
406 #warning "get_device_info: querying memory_clock_rate and \
407 memory_bus_width are not supported by the compiler used. \
408 Use 3200000 kHz as memory_clock_rate default value. \
409 Use 64 bits as memory_bus_width default value."
412 size_t max_sub_group_size = 1;
413 std::vector<size_t> sub_group_sizes =
414 get_info<sycl::info::device::sub_group_sizes>();
424 get_info<sycl::info::device::max_work_group_size>());
425 int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
433 get_info<sycl::info::device::global_mem_cache_size>());
443 void reset(
bool print_on_async_exceptions =
false,
bool in_order =
true) {
444 std::lock_guard<std::mutex> lock(m_mutex);
453 _saved_queue = _default_queue =
454 create_queue_impl(print_on_async_exceptions, in_order);
458 std::lock_guard<std::mutex> lock(m_mutex);
459 _queues.front().get()->wait_and_throw();
460 _queues[0] = std::make_shared<sycl::queue>(q);
461 if (_saved_queue == _default_queue)
462 _saved_queue = _queues.front().get();
463 _default_queue = _queues.front().get();
469 std::unique_lock<std::mutex> lock(m_mutex);
470 std::vector<std::shared_ptr<sycl::queue>> current_queues(_queues);
472 for (
const auto &q : current_queues) {
479 bool in_order =
true) {
480 std::lock_guard<std::mutex> lock(m_mutex);
481 return create_queue_impl(print_on_async_exceptions, in_order);
484 std::lock_guard<std::mutex> lock(m_mutex);
486 std::remove_if(_queues.begin(), _queues.end(),
487 [=](
const std::shared_ptr<sycl::queue> &q) ->
bool {
488 return q.get() == queue;
494 std::lock_guard<std::mutex> lock(m_mutex);
498 std::lock_guard<std::mutex> lock(m_mutex);
506 const std::initializer_list<sycl::aspect> &props)
const {
507 for (
const auto &it : props) {
511 case sycl::aspect::fp64:
513 "[SYCLcompat] 'double' is not supported in '" +
514 get_info<sycl::info::device::name>() +
517 case sycl::aspect::fp16:
519 "[SYCLcompat] 'half' is not supported in '" +
520 get_info<sycl::info::device::name>() +
524 #define __SYCL_ASPECT(ASPECT, ID) \
525 case sycl::aspect::ASPECT: \
527 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
528 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
531 #include <sycl/info/aspects.def>
532 #include <sycl/info/aspects_deprecated.def>
534 return "unknown aspect";
537 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
538 #undef __SYCL_ASPECT_DEPRECATED
542 "' is not supported in '" +
543 get_info<sycl::info::device::name>() +
553 queue_ptr create_queue_impl(
bool print_on_async_exceptions =
false,
554 bool in_order =
true) {
557 prop = {sycl::property::queue::in_order()};
559 #ifdef SYCLCOMPAT_PROFILING_ENABLED
560 prop.push_back(sycl::property::queue::enable_profiling());
562 if (print_on_async_exceptions) {
563 _queues.push_back(std::make_shared<sycl::queue>(
566 _queues.push_back(std::make_shared<sycl::queue>(_ctx, *
this, prop));
568 return _queues.back().get();
571 void get_version(
int &major,
int &minor)
const {
577 ver = get_info<sycl::info::device::version>();
578 std::string::size_type i = 0;
579 while (i < ver.size()) {
584 major = std::stoi(&(ver[i]));
585 while (i < ver.size()) {
590 if (i < ver.size()) {
593 minor = std::stoi(&(ver[i]));
600 std::lock_guard<std::mutex> lock(m_mutex);
601 _events.push_back(event);
608 std::vector<std::shared_ptr<sycl::queue>> _queues;
609 mutable std::mutex m_mutex;
610 std::vector<sycl::event> _events;
616 #if defined(__linux__)
617 return syscall(SYS_gettid);
618 #elif defined(_WIN64)
619 return GetCurrentThreadId();
621 #error "Only support Windows and Linux."
631 return *_devs[dev_id];
634 std::lock_guard<std::mutex> lock(m_mutex);
635 if (_cpu_device == -1) {
636 throw std::runtime_error(
"[SYCLcompat] No valid cpu device");
638 return *_devs[_cpu_device];
642 std::lock_guard<std::mutex> lock(m_mutex);
647 std::lock_guard<std::mutex> lock(m_mutex);
648 auto it = _thread2dev_map.find(
get_tid());
649 if (it != _thread2dev_map.end())
651 return _default_device_id;
658 std::lock_guard<std::mutex> lock(m_mutex);
660 _thread2dev_map[
get_tid()] = id;
666 for (
auto dev_item : _devs) {
667 if (*dev_item == dev) {
678 template <
class DeviceSelector>
680 std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
683 unsigned int selected_device_id =
get_device_id(selected_device);
698 mutable std::mutex m_mutex;
702 _devs.push_back(std::make_shared<device_ext>(default_device));
704 std::vector<sycl::device> sycl_all_devs =
705 sycl::device::get_devices(sycl::info::device_type::all);
707 if (default_device.
is_cpu())
709 for (
auto &dev : sycl_all_devs) {
710 if (dev == default_device) {
713 _devs.push_back(std::make_shared<device_ext>(dev));
714 if (_cpu_device == -1 && dev.is_cpu()) {
715 _cpu_device = _devs.size() - 1;
719 void check_id(
unsigned int id)
const {
720 if (
id >= _devs.size()) {
721 throw std::runtime_error(
"invalid device id");
724 std::vector<std::shared_ptr<device_ext>> _devs;
728 const unsigned int _default_device_id = 0;
730 std::map<unsigned int, unsigned int> _thread2dev_map;
731 int _cpu_device = -1;
737 bool in_order =
true) {
739 print_on_async_exceptions, in_order);
797 template <
class DeviceSelector>
798 static inline std::enable_if_t<
799 std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
The context class represents a SYCL context on which kernel functions may be executed.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
bool is_cpu() const
Check if device is a CPU device.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
A list of asynchronous exceptions.
const char * what() const noexcept final
A unique identifier of an item in an index space.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Defines the iteration domain of either a single work-group in a parallel dispatch,...
unsigned int get_device_id(const sycl::device &dev)
unsigned int current_device_id() const
device_ext & cpu_device() const
dev_mgr(dev_mgr &&)=delete
unsigned int device_count()
dev_mgr & operator=(const dev_mgr &)=delete
static dev_mgr & instance()
Returns the instance of device manager singleton.
std::enable_if_t< std::is_invocable_r_v< int, DeviceSelector, const sycl::device & > > select_device(const DeviceSelector &selector=sycl::gpu_selector_v)
Select device with a Device Selector.
dev_mgr & operator=(dev_mgr &&)=delete
void select_device(unsigned int id)
Select device with a device ID.
device_ext & current_device()
dev_mgr(const dev_mgr &)=delete
device_ext & get_device(unsigned int id) const
void destroy_queue(queue_ptr &queue)
bool is_native_host_atomic_supported()
int get_max_compute_units() const
queue_ptr create_queue(bool print_on_async_exceptions=false, bool in_order=true)
queue_ptr default_queue()
queue_ptr get_saved_queue() const
int get_max_sub_group_size() const
int get_major_version() const
void get_device_info(device_info &out) const
int get_max_clock_frequency() const
Return the maximum clock frequency of this device in KHz.
int get_minor_version() const
sycl::context get_context() const
void get_memory_info(size_t &free_memory, size_t &total_memory) const
Get the number of bytes of free and total memory on the SYCL device.
void set_saved_queue(queue_ptr q)
size_t get_global_mem_size() const
void set_default_queue(const sycl::queue &q)
int get_max_work_group_size() const
void reset(bool print_on_async_exceptions=false, bool in_order=true)
int get_mem_base_addr_align() const
int get_max_register_size_per_work_group() const
void has_capability_or_fail(const std::initializer_list< sycl::aspect > &props) const
Util function to check whether a device supports some kinds of sycl::aspect.
device_info get_device_info() const
void queues_wait_and_throw()
device_ext(const sycl::device &base, bool print_on_async_exceptions=false, bool in_order=true)
int get_integrated() const
friend sycl::event free_async(const std::vector< void * > &, const std::vector< sycl::event > &, sycl::queue)
Free the device memory pointed by a batch of pointers in pointers which are related to q after events...
int get_minor_version() const
auto get_max_work_item_sizes() const
void set_local_mem_size(size_t local_mem_size)
int get_integrated() const
size_t get_local_mem_size() const
unsigned int get_memory_bus_width() const
Returns the maximum bus width between device and memory in bits.
unsigned int get_memory_clock_rate() const
Returns the maximum clock rate of device's global memory in kHz.
int get_max_compute_units() const
void set_max_compute_units(int max_compute_units)
void set_device_id(uint32_t device_id)
void set_max_nd_range_size(int max_nd_range_size[])
void set_memory_clock_rate(unsigned int memory_clock_rate)
void set_uuid(std::array< unsigned char, 16 > uuid)
void set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit)
void set_major_version(int major)
void set_global_mem_size(size_t global_mem_size)
int get_major_version() const
void set_integrated(int integrated)
unsigned int get_global_mem_cache_size() const
Returns global memory cache size in bytes.
auto get_max_nd_range_size() const
void set_max_work_item_sizes(const sycl::id< 3 > max_work_item_sizes)
void set_max_clock_frequency(int frequency)
std::array< unsigned char, 16 > get_uuid() const
int get_max_register_size_per_work_group() const
const char * get_name() const
void set_host_unified_memory(bool host_unified_memory)
size_t get_global_mem_size() const
int get_max_sub_group_size() const
int get_max_work_group_size() const
void set_max_work_item_sizes(const sycl::range< 3 > max_work_item_sizes)
void set_max_work_group_size(int max_work_group_size)
void set_max_register_size_per_work_group(int max_register_size_per_work_group)
auto get_max_nd_range_size()
int get_max_clock_frequency() const
uint32_t get_device_id() const
int get_max_work_items_per_compute_unit() const
void set_max_sub_group_size(int max_sub_group_size)
auto get_max_work_item_sizes()
void set_minor_version(int minor)
void set_global_mem_cache_size(unsigned int global_mem_cache_size)
bool get_host_unified_memory() const
void set_memory_bus_width(unsigned int memory_bus_width)
void set_name(const char *name)
class __SYCL2020_DEPRECATED("Host device is no longer supported.") host_selector int default_selector_v(const device &dev)
Selects SYCL host device.
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
static std::string getAspectNameStr(sycl::aspect AspectNum)
constexpr sub_group_size_key::value_t< Size > sub_group_size
int gpu_selector_v(const device &dev)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static unsigned int get_tid()
auto exception_handler
SYCL default exception handler.
static sycl::queue create_queue(bool print_on_async_exceptions=false, bool in_order=true)
static sycl::queue get_default_queue()
Util function to get the default queue of current device in device manager.
static void wait(sycl::queue q=get_default_queue())
static device_ext & get_current_device()
Util function to get the current device.
static unsigned int get_current_device_id()
Util function to get the id of current device in device manager.
static device_ext & cpu_device()
Util function to get a CPU device.
static void destroy_event(event_ptr event)
Destroy event pointed memory.
static void set_default_queue(const sycl::queue &q)
Util function to change the default queue of the current device in the device manager If the device e...
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
static sycl::context get_default_context()
Util function to get the context of the default queue of current device in device manager.
static void wait_and_throw(sycl::queue q=get_default_queue())
static unsigned int get_device_id(const sycl::device &dev)
static unsigned int select_device(unsigned int id)