43 #if defined(__linux__)
44 #include <sys/syscall.h>
68 std::string::size_type i = 0;
69 while (i < ver.size()) {
75 major = std::stoi(&(ver[i]));
78 while (i < ver.size()) {
85 minor = std::stoi(&(ver[i]));
91 std::string ver = dev.
get_info<sycl::info::device::version>();
97 for (std::exception_ptr
const &e : exceptions) {
99 std::rethrow_exception(e);
101 std::cerr <<
"[SYCLcompat] Caught asynchronous SYCL exception:"
103 << e.
what() << std::endl
104 <<
"Exception caught at file:" << __FILE__
105 <<
", line:" << __LINE__ << std::endl;
128 template <
typename WorkItemSizesTy = sycl::range<3>,
129 std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
130 std::is_same_v<WorkItemSizesTy,
int *>,
134 return _max_work_item_sizes;
136 return _max_work_item_sizes_i;
138 template <
typename WorkItemSizesTy = sycl::range<3>,
139 std::enable_if_t<std::is_same_v<WorkItemSizesTy, sycl::range<3>> ||
140 std::is_same_v<WorkItemSizesTy,
int *>,
144 return _max_work_item_sizes;
146 return _max_work_item_sizes_i;
157 return _max_work_items_per_compute_unit;
160 return _max_register_size_per_work_group;
162 template <
typename NDRangeSizeTy =
size_t *,
163 std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
164 std::is_same_v<NDRangeSizeTy, int *>,
167 if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
168 return _max_nd_range_size;
170 return _max_nd_range_size_i;
172 template <
typename NDRangeSizeTy =
size_t *,
173 std::enable_if_t<std::is_same_v<NDRangeSizeTy, size_t *> ||
174 std::is_same_v<NDRangeSizeTy, int *>,
177 if constexpr (std::is_same_v<NDRangeSizeTy, size_t *>)
178 return _max_nd_range_size;
180 return _max_nd_range_size_i;
191 std::array<unsigned char, 16>
get_uuid()
const {
return _uuid; }
194 return _global_mem_cache_size;
204 size_t length = strlen(name);
205 if (
length < device_info::NAME_BUFFER_SIZE) {
206 std::memcpy(_name, name,
length + 1);
208 std::memcpy(_name, name, device_info::NAME_BUFFER_SIZE - 1);
213 _max_work_item_sizes = max_work_item_sizes;
214 for (
int i = 0; i < 3; ++i)
215 _max_work_item_sizes_i[i] = max_work_item_sizes[i];
219 for (
int i = 0; i < 3; ++i) {
220 _max_work_item_sizes[i] = max_work_item_sizes[i];
221 _max_work_item_sizes_i[i] = max_work_item_sizes[i];
225 _host_unified_memory = host_unified_memory;
232 _max_compute_units = max_compute_units;
235 _global_mem_size = global_mem_size;
238 _local_mem_size = local_mem_size;
241 _max_work_group_size = max_work_group_size;
244 _max_sub_group_size = max_sub_group_size;
248 _max_work_items_per_compute_unit = max_work_items_per_compute_unit;
251 for (
int i = 0; i < 3; i++) {
252 _max_nd_range_size[i] = max_nd_range_size[i];
253 _max_nd_range_size_i[i] = max_nd_range_size[i];
257 for (
int i = 0; i < 3; i++) {
258 _max_nd_range_size[i] = max_nd_range_size[i];
259 _max_nd_range_size_i[i] = max_nd_range_size[i];
263 _memory_clock_rate = memory_clock_rate;
266 _memory_bus_width = memory_bus_width;
270 _max_register_size_per_work_group = max_register_size_per_work_group;
273 void set_uuid(std::array<unsigned char, 16> uuid) { _uuid = std::move(uuid); }
275 _global_mem_cache_size = global_mem_cache_size;
278 _image1d_max = image_max_buffer_size;
281 size_t image_max_height_buffer_size) {
282 _image2d_max[0] = image_max_width_buffer_size;
283 _image2d_max[1] = image_max_height_buffer_size;
286 size_t image_max_height_buffer_size,
287 size_t image_max_depth_buffer_size) {
288 _image3d_max[0] = image_max_width_buffer_size;
289 _image3d_max[1] = image_max_height_buffer_size;
290 _image3d_max[2] = image_max_depth_buffer_size;
294 constexpr
static size_t NAME_BUFFER_SIZE = 256;
296 char _name[device_info::NAME_BUFFER_SIZE];
298 int _max_work_item_sizes_i[3];
299 bool _host_unified_memory =
false;
305 unsigned int _memory_clock_rate = 3200000;
307 unsigned int _memory_bus_width = 64;
308 unsigned int _global_mem_cache_size;
309 int _max_compute_units;
310 int _max_work_group_size;
311 int _max_sub_group_size;
312 int _max_work_items_per_compute_unit;
313 int _max_register_size_per_work_group;
314 size_t _global_mem_size;
315 size_t _local_mem_size;
316 size_t _max_nd_range_size[3];
317 int _max_nd_range_size_i[3];
319 std::array<unsigned char, 16> _uuid;
343 std::lock_guard<std::mutex> lock(m_mutex);
346 }
catch (std::exception &e) {
351 bool in_order =
true)
352 :
sycl::device(base), _ctx(*this) {
353 if (!this->has(sycl::aspect::usm_device_allocations)) {
354 throw std::invalid_argument(
355 "Device does not support device USM allocations");
358 _default_queue =
create_queue(print_on_async_exceptions, in_order);
359 _saved_queue = _default_queue;
391 return get_info<sycl::info::device::mem_base_addr_align>();
404 #if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105)
405 if (!has(sycl::aspect::ext_intel_free_memory)) {
406 std::cerr <<
"[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
411 free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
414 std::cerr <<
"[SYCLCompat] get_memory_info: ext_intel_free_memory is not "
418 #if defined(_MSC_VER) && !defined(__clang__)
419 #pragma message("Querying the number of bytes of free memory is not supported")
421 #warning "Querying the number of bytes of free memory is not supported"
429 prop.
set_name(get_info<sycl::info::device::name>().c_str());
432 get_version(major, minor);
437 #
if (__SYCL_COMPILER_VERSION && __SYCL_COMPILER_VERSION < 20220902)
440 get_info<sycl::info::device::max_work_item_sizes>());
444 get_info<sycl::info::device::max_work_item_sizes<3>>());
449 get_info<sycl::info::device::max_clock_frequency>());
451 get_info<sycl::info::device::max_compute_units>());
453 get_info<sycl::info::device::max_work_group_size>());
457 #if (defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 6)
458 if (has(sycl::aspect::ext_intel_memory_clock_rate)) {
460 get_info<sycl::ext::intel::info::device::memory_clock_rate>();
464 if (has(sycl::aspect::ext_intel_memory_bus_width)) {
466 get_info<sycl::ext::intel::info::device::memory_bus_width>());
468 if (has(sycl::aspect::ext_intel_device_id)) {
469 prop.
set_device_id(get_info<sycl::ext::intel::info::device::device_id>());
471 if (has(sycl::aspect::ext_intel_device_info_uuid)) {
472 prop.
set_uuid(get_info<sycl::ext::intel::info::device::uuid>());
474 #elif defined(_MSC_VER) && !defined(__clang__)
475 #pragma message("get_device_info: querying memory_clock_rate and \
476 memory_bus_width are not supported by the compiler used. \
477 Use 3200000 kHz as memory_clock_rate default value. \
478 Use 64 bits as memory_bus_width default value.")
480 #warning "get_device_info: querying memory_clock_rate and \
481 memory_bus_width are not supported by the compiler used. \
482 Use 3200000 kHz as memory_clock_rate default value. \
483 Use 64 bits as memory_bus_width default value."
486 size_t max_sub_group_size = 1;
487 std::vector<size_t> sub_group_sizes =
488 get_info<sycl::info::device::sub_group_sizes>();
498 get_info<sycl::info::device::max_work_group_size>());
499 #ifdef SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY
504 #if defined(_MSC_VER) && !defined(__clang__)
505 #pragma message("get_device_info: querying the maximum number \
506 of work groups is not supported.")
508 #warning "get_device_info: querying the maximum number of \
509 work groups is not supported."
511 int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
520 get_info<sycl::info::device::global_mem_cache_size>());
522 prop.
set_image1d_max(get_info<sycl::info::device::image_max_buffer_size>());
523 prop.
set_image1d_max(get_info<sycl::info::device::image_max_buffer_size>());
524 prop.
set_image2d_max(get_info<sycl::info::device::image2d_max_width>(),
525 get_info<sycl::info::device::image2d_max_height>());
526 prop.
set_image3d_max(get_info<sycl::info::device::image3d_max_width>(),
527 get_info<sycl::info::device::image3d_max_height>(),
528 get_info<sycl::info::device::image3d_max_height>());
538 void reset(
bool print_on_async_exceptions =
false,
bool in_order =
true) {
539 std::lock_guard<std::mutex> lock(m_mutex);
549 _saved_queue = _default_queue =
550 in_order ? create_queue_impl(print_on_async_exceptions,
551 sycl::property::queue::in_order())
552 : create_queue_impl(print_on_async_exceptions);
556 std::lock_guard<std::mutex> lock(m_mutex);
557 _queues.front().get()->wait_and_throw();
558 _queues[0] = std::make_shared<sycl::queue>(q);
559 if (_saved_queue == _default_queue)
560 _saved_queue = _queues.front().get();
561 _default_queue = _queues.front().get();
567 std::unique_lock<std::mutex> lock(m_mutex);
568 std::vector<std::shared_ptr<sycl::queue>> current_queues(_queues);
570 for (
const auto &q : current_queues) {
577 bool in_order =
true) {
578 std::lock_guard<std::mutex> lock(m_mutex);
579 return in_order ? create_queue_impl(print_on_async_exceptions,
580 sycl::property::queue::in_order())
581 : create_queue_impl(print_on_async_exceptions);
584 std::lock_guard<std::mutex> lock(m_mutex);
586 std::remove_if(_queues.begin(), _queues.end(),
587 [=](
const std::shared_ptr<sycl::queue> &q) ->
bool {
588 return q.get() == queue;
594 std::lock_guard<std::mutex> lock(m_mutex);
598 std::lock_guard<std::mutex> lock(m_mutex);
606 const std::initializer_list<sycl::aspect> &props)
const {
607 for (
const auto &it : props) {
611 case sycl::aspect::fp64:
613 "[SYCLcompat] 'double' is not supported in '" +
614 get_info<sycl::info::device::name>() +
617 case sycl::aspect::fp16:
619 "[SYCLcompat] 'half' is not supported in '" +
620 get_info<sycl::info::device::name>() +
624 #define __SYCL_ASPECT(ASPECT, ID) \
625 case sycl::aspect::ASPECT: \
627 #define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID)
628 #define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE)
631 #include <sycl/info/aspects.def>
632 #include <sycl/info/aspects_deprecated.def>
634 return "unknown aspect";
637 #undef __SYCL_ASPECT_DEPRECATED_ALIAS
638 #undef __SYCL_ASPECT_DEPRECATED
642 "' is not supported in '" +
643 get_info<sycl::info::device::name>() +
653 template <
typename... PropertiesT>
654 queue_ptr create_queue_impl(
bool print_on_async_exceptions =
false,
655 PropertiesT... properties) {
657 #ifdef SYCLCOMPAT_PROFILING_ENABLED
658 sycl::property::queue::enable_profiling(),
661 if (print_on_async_exceptions) {
662 _queues.push_back(std::make_shared<sycl::queue>(
665 _queues.push_back(std::make_shared<sycl::queue>(_ctx, *
this, prop));
667 return _queues.back().get();
670 void get_version(
int &major,
int &minor)
const {
674 std::lock_guard<std::mutex> lock(m_mutex);
675 _events.push_back(event);
678 const std::vector<sycl::event> &,
683 std::vector<std::shared_ptr<sycl::queue>> _queues;
684 mutable std::mutex m_mutex;
685 std::vector<sycl::event> _events;
691 #if defined(__linux__)
692 return syscall(SYS_gettid);
693 #elif defined(_WIN64)
694 return GetCurrentThreadId();
696 #error "Only support Windows and Linux."
706 return *_devs[dev_id];
709 std::lock_guard<std::mutex> lock(m_mutex);
710 if (_cpu_device == -1) {
711 throw std::runtime_error(
"[SYCLcompat] No valid cpu device");
713 return *_devs[_cpu_device];
717 std::lock_guard<std::mutex> lock(m_mutex);
722 std::lock_guard<std::mutex> lock(m_mutex);
723 auto it = _thread2dev_map.find(
get_tid());
724 if (it != _thread2dev_map.end())
726 return _default_device_id;
733 std::lock_guard<std::mutex> lock(m_mutex);
735 _thread2dev_map[
get_tid()] = id;
741 throw std::runtime_error(
742 "[SYCLcompat] No SYCL devices found in the device list. Device list "
743 "may have been filtered by syclcompat::filter_device");
746 for (
auto dev_item : _devs) {
747 if (*dev_item == dev) {
752 throw std::runtime_error(
"[SYCLcompat] The device[" +
753 dev.
get_info<sycl::info::device::name>() +
754 "] is filtered out by syclcompat::filter_device "
755 "in current device list!");
760 for (
size_t i = 0; i < _devs.size(); ++i) {
762 << _devs[i]->get_info<sycl::info::device::name>() << std::endl;
770 void filter(
const std::vector<std::string> &dev_subnames) {
771 std::lock_guard<std::mutex> lock(m_mutex);
772 auto iter = _devs.begin();
773 while (iter != _devs.end()) {
774 std::string dev_name = (*iter)->get_info<sycl::info::device::name>();
775 bool matched =
false;
776 for (
const auto &name : dev_subnames) {
777 if (dev_name.find(name) != std::string::npos) {
785 iter = _devs.erase(iter);
788 for (
unsigned i = 0; i < _devs.size(); ++i) {
789 if (_devs[i]->is_cpu()) {
794 _thread2dev_map.clear();
795 #ifdef SYCLCOMPAT_VERBOSE
803 template <
class DeviceSelector>
805 std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
808 unsigned int selected_device_id =
get_device_id(selected_device);
823 mutable std::mutex m_mutex;
827 _devs.push_back(std::make_shared<device_ext>(default_device));
829 std::vector<sycl::device> sycl_all_devs =
830 sycl::device::get_devices(sycl::info::device_type::all);
832 if (default_device.
is_cpu())
834 for (
auto &dev : sycl_all_devs) {
835 if (dev == default_device) {
838 _devs.push_back(std::make_shared<device_ext>(dev));
839 if (_cpu_device == -1 && dev.is_cpu()) {
840 _cpu_device = _devs.size() - 1;
843 #ifdef SYCLCOMPAT_VERBOSE
847 void check_id(
unsigned int id)
const {
848 if (
id >= _devs.size()) {
849 throw std::runtime_error(
"invalid device id");
852 std::vector<std::shared_ptr<device_ext>> _devs;
856 const unsigned int _default_device_id = 0;
858 std::map<unsigned int, unsigned int> _thread2dev_map;
859 int _cpu_device = -1;
865 bool in_order =
true) {
867 print_on_async_exceptions, in_order);
924 static inline void filter_device(
const std::vector<std::string> &dev_subnames) {
938 template <
class DeviceSelector>
939 static inline std::enable_if_t<
940 std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>
The context class represents a SYCL context on which kernel functions may be executed.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
bool is_cpu() const
Get instance of device.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
A list of asynchronous exceptions.
const char * what() const noexcept final
A unique identifier of an item in an index space.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Defines the iteration domain of either a single work-group in a parallel dispatch,...
unsigned int get_device_id(const sycl::device &dev)
unsigned int current_device_id() const
void filter(const std::vector< std::string > &dev_subnames)
Filter out devices; only keep the device whose name contains one of the subname in dev_subnames.
device_ext & cpu_device() const
dev_mgr(dev_mgr &&)=delete
unsigned int device_count()
dev_mgr & operator=(const dev_mgr &)=delete
static dev_mgr & instance()
Returns the instance of device manager singleton.
void list_devices() const
List all the devices with its id in dev_mgr.
std::enable_if_t< std::is_invocable_r_v< int, DeviceSelector, const sycl::device & > > select_device(const DeviceSelector &selector=sycl::gpu_selector_v)
Select device with a Device Selector.
dev_mgr & operator=(dev_mgr &&)=delete
void select_device(unsigned int id)
Select device with a device ID.
device_ext & current_device()
dev_mgr(const dev_mgr &)=delete
device_ext & get_device(unsigned int id) const
void destroy_queue(queue_ptr &queue)
bool is_native_host_atomic_supported()
int get_max_compute_units() const
queue_ptr create_queue(bool print_on_async_exceptions=false, bool in_order=true)
queue_ptr default_queue()
friend sycl::event enqueue_free(const std::vector< void * > &, const std::vector< sycl::event > &, sycl::queue)
Enqueues the release of all pointers in /p pointers on the /p q.
queue_ptr get_saved_queue() const
int get_max_sub_group_size() const
int get_major_version() const
void get_device_info(device_info &out) const
int get_max_clock_frequency() const
Return the maximum clock frequency of this device in KHz.
int get_minor_version() const
sycl::context get_context() const
void get_memory_info(size_t &free_memory, size_t &total_memory) const
Get the number of bytes of free and total memory on the SYCL device.
void set_saved_queue(queue_ptr q)
size_t get_global_mem_size() const
void set_default_queue(const sycl::queue &q)
int get_max_work_group_size() const
void reset(bool print_on_async_exceptions=false, bool in_order=true)
int get_mem_base_addr_align() const
int get_max_register_size_per_work_group() const
void has_capability_or_fail(const std::initializer_list< sycl::aspect > &props) const
Util function to check whether a device supports some kinds of sycl::aspect.
device_info get_device_info() const
void queues_wait_and_throw()
device_ext(const sycl::device &base, bool print_on_async_exceptions=false, bool in_order=true)
int get_integrated() const
void set_image3d_max(size_t image_max_width_buffer_size, size_t image_max_height_buffer_size, size_t image_max_depth_buffer_size)
int get_minor_version() const
auto get_max_work_item_sizes() const
void set_local_mem_size(size_t local_mem_size)
int get_integrated() const
size_t get_local_mem_size() const
unsigned int get_memory_bus_width() const
Returns the maximum bus width between device and memory in bits.
unsigned int get_memory_clock_rate() const
Returns the maximum clock rate of device's global memory in kHz.
int get_max_compute_units() const
void set_max_compute_units(int max_compute_units)
void set_device_id(uint32_t device_id)
auto get_image3d_max() const
void set_max_nd_range_size(int max_nd_range_size[])
void set_memory_clock_rate(unsigned int memory_clock_rate)
void set_image2d_max(size_t image_max_width_buffer_size, size_t image_max_height_buffer_size)
void set_uuid(std::array< unsigned char, 16 > uuid)
void set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit)
void set_major_version(int major)
void set_global_mem_size(size_t global_mem_size)
void set_max_nd_range_size(sycl::id< 3 > max_nd_range_size)
int get_major_version() const
void set_integrated(int integrated)
unsigned int get_global_mem_cache_size() const
Returns global memory cache size in bytes.
auto get_max_nd_range_size() const
void set_max_work_item_sizes(const sycl::id< 3 > max_work_item_sizes)
void set_max_clock_frequency(int frequency)
int get_image1d_max() const
auto get_image2d_max() const
std::array< unsigned char, 16 > get_uuid() const
int get_max_register_size_per_work_group() const
const char * get_name() const
void set_host_unified_memory(bool host_unified_memory)
size_t get_global_mem_size() const
int get_max_sub_group_size() const
int get_max_work_group_size() const
void set_max_work_item_sizes(const sycl::range< 3 > max_work_item_sizes)
void set_max_work_group_size(int max_work_group_size)
void set_max_register_size_per_work_group(int max_register_size_per_work_group)
auto get_max_nd_range_size()
int get_max_clock_frequency() const
uint32_t get_device_id() const
int get_max_work_items_per_compute_unit() const
void set_max_sub_group_size(int max_sub_group_size)
auto get_max_work_item_sizes()
void set_minor_version(int minor)
void set_global_mem_cache_size(unsigned int global_mem_cache_size)
void set_image1d_max(size_t image_max_buffer_size)
bool get_host_unified_memory() const
void set_memory_bus_width(unsigned int memory_bus_width)
void set_name(const char *name)
#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e)
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
__SYCL_EXTERN_STREAM_ATTRS ostream cerr
Linked to standard error (unbuffered)
static std::string getAspectNameStr(sycl::aspect AspectNum)
constexpr sub_group_size_key::value_t< Size > sub_group_size
int default_selector_v(const device &dev)
int gpu_selector_v(const device &dev)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static unsigned int get_tid()
static void parse_version_string(const std::string &ver, int &major, int &minor)
auto exception_handler
SYCL default exception handler.
static void get_version(const sycl::device &dev, int &major, int &minor)
static sycl::queue create_queue(bool print_on_async_exceptions=false, bool in_order=true)
static sycl::queue get_default_queue()
Util function to get the default queue of current device in device manager.
static void wait(sycl::queue q=get_default_queue())
static device_ext & get_current_device()
Util function to get the current device.
static int get_minor_version(const sycl::device &dev)
static unsigned int get_current_device_id()
Util function to get the id of current device in device manager.
static unsigned int device_count()
static device_ext & cpu_device()
Util function to get a CPU device.
static void destroy_event(event_ptr event)
Destroy event pointed memory.
static void set_default_queue(const sycl::queue &q)
Util function to change the default queue of the current device in the device manager If the device e...
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
static int get_major_version(const sycl::device &dev)
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
static sycl::context get_default_context()
Util function to get the context of the default queue of current device in device manager.
static void wait_and_throw(sycl::queue q=get_default_queue())
static void filter_device(const std::vector< std::string > &dev_subnames)
Filter out devices; only keep the device whose name contains one of the subname in dev_subnames.
static void list_devices()
List all the devices with its id in dev_mgr.
static unsigned int get_device_id(const sycl::device &dev)
static unsigned int select_device(unsigned int id)