|
void | syclcompat::detail::matrix_mem_copy (void *to_ptr, const void *from_ptr, int to_ld, int from_ld, int rows, int cols, int elem_size, sycl::queue queue=syclcompat::get_default_queue(), bool async=false) |
|
template<typename T > |
void | syclcompat::detail::matrix_mem_copy (T *to_ptr, const T *from_ptr, int to_ld, int from_ld, int rows, int cols, sycl::queue queue=get_default_queue(), bool async=false) |
| Copy matrix data. More...
|
|
int | syclcompat::cast_double_to_int (double d, bool use_high32=true) |
| Cast the high or low 32 bits of a double to an integer. More...
|
|
double | syclcompat::cast_ints_to_double (int high32, int low32) |
| Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double. More...
|
|
template<typename T > |
T | syclcompat::reverse_bits (T a) |
| Reverse the bit order of an unsigned integer. More...
|
|
unsigned int | syclcompat::byte_level_permute (unsigned int a, unsigned int b, unsigned int s) |
|
template<typename T > |
int | syclcompat::ffs (T a) |
| Find position of first least significant set bit in an integer. More...
|
|
template<typename T > |
T | syclcompat::select_from_sub_group (sycl::sub_group g, T x, int remote_local_id, int logical_sub_group_size=32) |
| select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group. More...
|
|
template<typename T > |
T | syclcompat::shift_sub_group_left (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32) |
| shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left. More...
|
|
template<typename T > |
T | syclcompat::shift_sub_group_right (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32) |
| shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right. More...
|
|
template<typename T > |
T | syclcompat::permute_sub_group_by_xor (sycl::sub_group g, T x, unsigned int mask, int logical_sub_group_size=32) |
| permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask. More...
|
|
template<typename T > |
T | syclcompat::experimental::select_from_sub_group (unsigned int member_mask, sycl::sub_group g, T x, int remote_local_id, int logical_sub_group_size=32) |
| Masked version of select_from_sub_group, which execute masked sub-group operation. More...
|
|
template<typename T > |
T | syclcompat::experimental::shift_sub_group_left (unsigned int member_mask, sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32) |
| Masked version of shift_sub_group_left, which execute masked sub-group operation. More...
|
|
template<typename T > |
T | syclcompat::experimental::shift_sub_group_right (unsigned int member_mask, sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32) |
| Masked version of shift_sub_group_right, which execute masked sub-group operation. More...
|
|
template<typename T > |
T | syclcompat::experimental::permute_sub_group_by_xor (unsigned int member_mask, sycl::sub_group g, T x, unsigned int mask, int logical_sub_group_size=32) |
| Masked version of permute_sub_group_by_xor, which execute masked sub-group operation. More...
|
|
int | syclcompat::get_sycl_language_version () |
| Inherited from the original SYCLomatic compatibility headers. More...
|
|
template<typename T > |
unsigned int | syclcompat::match_any_over_sub_group (sycl::sub_group g, unsigned member_mask, T value) |
| The function match_any_over_sub_group conducts a comparison of values across work-items within a sub-group. More...
|
|
template<typename T > |
unsigned int | syclcompat::match_all_over_sub_group (sycl::sub_group g, unsigned member_mask, T value, int *pred) |
| The function match_all_over_sub_group conducts a comparison of values across work-items within a sub-group. More...
|
|
template<int dimensions = 3> |
void | syclcompat::experimental::nd_range_barrier (const sycl::nd_item< dimensions > &item, sycl::atomic_ref< unsigned int, barrier_memory_order, sycl::memory_scope::device, sycl::access::address_space::global_space > &counter) |
| Synchronize work items from all work groups within a SYCL kernel. More...
|
|
template<> |
void | syclcompat::experimental::nd_range_barrier (const sycl::nd_item< 1 > &item, sycl::atomic_ref< unsigned int, barrier_memory_order, sycl::memory_scope::device, sycl::access::address_space::global_space > &counter) |
| Synchronize work items from all work groups within a SYCL kernel. More...
|
|
int | syclcompat::experimental::calculate_max_active_wg_per_xecore (int *num_wg, int wg_size, int slm_size=0, int sg_size=32, bool used_barrier=false, bool used_large_grf=false) |
| This function is used for occupancy calculation, it computes the max active work-group number per Xe-Core. More...
|
|
int | syclcompat::experimental::calculate_max_potential_wg (int *num_wg, int *wg_size, int max_wg_size_for_device_code, int slm_size=0, int sg_size=32, bool used_barrier=false, bool used_large_grf=false) |
| This function is used for occupancy calculation, it computes the work-group number and the work-group size which achieves the maximum occupancy of the device potentially. More...
|
|
queue_ptr | syclcompat::int_as_queue_ptr (uintptr_t x) |
| If x <= 2, then return a pointer to the default queue; otherwise, return x reinterpreted as a queue_ptr. More...
|
|