DPC++ Runtime
Runtime libraries for oneAPI DPC++
syclcompat Namespace Reference

Namespaces

 detail
 Atomic extension to implement standard APIs in std::atomic.
 
 experimental
 
 global_id
 
 global_range
 
 local_id
 
 local_range
 
 work_group_id
 
 work_group_range
 

Classes

class  atomic
 
class  device_info
 
class  device_ext
 device extension More...
 
class  dim3
 
struct  kernel_function_info
 
class  kernel_library
 
class  kernel_function
 
struct  abs
 A sycl::abs wrapper functors. More...
 
struct  abs_diff
 A sycl::abs_diff wrapper functors. More...
 
struct  add_sat
 A sycl::add_sat wrapper functors. More...
 
struct  rhadd
 A sycl::rhadd wrapper functors. More...
 
struct  hadd
 A sycl::hadd wrapper functors. More...
 
struct  maximum
 A sycl::max wrapper functors. More...
 
struct  minimum
 A sycl::min wrapper functors. More...
 
struct  sub_sat
 A sycl::sub_sat wrapper functors. More...
 
class  pitched_data
 Pitched 2D/3D memory data. More...
 
class  accessor
 accessor used as device function parameter. More...
 
class  accessor< T, Memory, 3 >
 
class  accessor< T, Memory, 2 >
 
class  device_memory
 Device variable with address space of shared or global. More...
 
class  device_memory< T, Memory, 0 >
 
class  pointer_attributes
 
struct  type_identity
 
struct  arith
 
class  args_selector
 
class  args_selector< n_nondefault_params, n_default_params, R(Ts...)>
 args_selector is a helper class for extracting arguments from an array of pointers to arguments or buffer of arguments to pass to a kernel function. More...
 

Typedefs

using event_ptr = sycl::event *
 
using queue_ptr = sycl::queue *
 
using device_ptr = char *
 
typedef void(* kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)
 
using byte_t = uint8_t
 
template<class T , size_t Dimension>
using global_memory = device_memory< T, memory_region::global, Dimension >
 
template<class T , size_t Dimension>
using constant_memory = device_memory< T, memory_region::constant, Dimension >
 
template<class T , size_t Dimension>
using shared_memory = device_memory< T, memory_region::usm_shared, Dimension >
 
template<class T >
using type_identity_t = typename type_identity< T >::type
 
template<typename T >
using arith_t = typename arith< T >::type
 
using err0 = detail::generic_error_type< struct err0_tag, int >
 
using err1 = detail::generic_error_type< struct err1_tag, int >
 

Enumerations

enum  error_code { SUCCESS = 0 , BACKEND_ERROR = 1 , DEFAULT_ERROR = 999 }
 
enum class  memory_region { global = 0 , constant , local , usm_shared }
 
enum class  target { device , local }
 

Functions

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_add (T *addr, arith_t< T > operand)
 Atomically add the value operand to the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_sub (T *addr, arith_t< T > operand)
 Atomically subtract the value operand from the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_and (T *addr, type_identity_t< T > operand)
 Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_or (T *addr, type_identity_t< T > operand)
 Atomically or the value at the addr with the value operand, and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_xor (T *addr, type_identity_t< T > operand)
 Atomically xor the value at the addr with the value operand, and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_min (T *addr, type_identity_t< T > operand)
 Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_max (T *addr, type_identity_t< T > operand)
 Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int atomic_fetch_compare_dec (unsigned int *addr, unsigned int operand)
 Atomically set operand to the value stored in addr, if old value stored in addr is equal to zero or greater than operand, else decrease the value stored in addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int atomic_fetch_compare_inc (unsigned int *addr, unsigned int operand)
 Atomically increment the value stored in addr if old value stored in addr is less than operand, else set 0 to the value stored in addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_exchange (T *addr, type_identity_t< T > operand)
 Atomically exchange the value at the address addr with the value operand. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_compare_exchange_strong (sycl::multi_ptr< T, sycl::access::address_space::generic_space > addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
 Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_compare_exchange_strong (T *addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
 Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected. More...
 
static void destroy_event (event_ptr event)
 Destroy event pointed memory. More...
 
static sycl::queue create_queue (bool print_on_async_exceptions=false, bool in_order=true)
 
static sycl::queue get_default_queue ()
 Util function to get the default queue of current device in device manager. More...
 
static void set_default_queue (const sycl::queue &q)
 Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well. More...
 
static void wait (sycl::queue q=get_default_queue())
 
static void wait_and_throw (sycl::queue q=get_default_queue())
 
static unsigned int get_current_device_id ()
 Util function to get the id of current device in device manager. More...
 
static device_extget_current_device ()
 Util function to get the current device. More...
 
static device_extget_device (unsigned int id)
 Util function to get a device by id. More...
 
static sycl::context get_default_context ()
 Util function to get the context of the default queue of current device in device manager. More...
 
static device_extcpu_device ()
 Util function to get a CPU device. More...
 
static unsigned int select_device (unsigned int id)
 
template<class DeviceSelector >
static std::enable_if_t< std::is_invocable_r_v< int, DeviceSelector, const sycl::device & > > select_device (const DeviceSelector &selector=sycl::gpu_selector_v)
 
static unsigned int get_device_id (const sycl::device &dev)
 
dim3 operator* (const dim3 &a, const dim3 &b)
 
dim3 operator+ (const dim3 &a, const dim3 &b)
 
dim3 operator- (const dim3 &a, const dim3 &b)
 
void wg_barrier ()
 
static void get_kernel_function_info (kernel_function_info *kernel_info, const void *function)
 
static kernel_function_info get_kernel_function_info (const void *function)
 
static kernel_library load_kernel_library (const std::string &name)
 Load kernel library and return a handle to use the library. More...
 
static kernel_library load_kernel_library_mem (char const *const image)
 Load kernel library whose image is alreay in memory and return a handle to use the library. More...
 
static void unload_kernel_library (const kernel_library &library)
 Unload kernel library. More...
 
static kernel_function get_kernel_function (kernel_library &library, const std::string &name)
 Find kernel function in a kernel library and return its address. More...
 
static void invoke_kernel_function (kernel_function &function, sycl::queue &queue, sycl::range< 3 > group_range, sycl::range< 3 > local_range, unsigned int local_mem_size, void **kernel_params, void **extra)
 Invoke a kernel function. More...
 
template<int Dim>
sycl::nd_range< Dim > compute_nd_range (sycl::range< Dim > global_size_in, sycl::range< Dim > work_group_size)
 
sycl::nd_range< 1 > compute_nd_range (int global_size_in, int work_group_size)
 
template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const sycl::nd_range< Dim > &range, sycl::queue q, Args... args)
 
template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const sycl::nd_range< Dim > &range, Args... args)
 
template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const dim3 &grid, const dim3 &threads, sycl::queue q, Args... args)
 
template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const dim3 &grid, const dim3 &threads, Args... args)
 
template<auto F, int Dim, typename... Args>
sycl::event launch (const sycl::nd_range< Dim > &range, size_t mem_size, sycl::queue q, Args... args)
 Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue. More...
 
template<auto F, int Dim, typename... Args>
sycl::event launch (const sycl::nd_range< Dim > &range, size_t mem_size, Args... args)
 Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue. More...
 
template<auto F, typename... Args>
sycl::event launch (const dim3 &grid, const dim3 &threads, size_t mem_size, sycl::queue q, Args... args)
 Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue. More...
 
template<auto F, typename... Args>
sycl::event launch (const dim3 &grid, const dim3 &threads, size_t mem_size, Args... args)
 Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue. More...
 
float fast_length (const float *a, int len)
 Compute fast_length for variable-length array. More...
 
template<typename ValueT >
ValueT length (const ValueT *a, const int len)
 Calculate the square root of the input array. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v< std::invoke_result_t< BinaryOperation, ValueT, ValueT >, bool >, bool > compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs comparison. More...
 
template<typename ValueT >
std::enable_if_t< std::is_same_v< std::invoke_result_t< std::not_equal_to<>, ValueT, ValueT >, bool >, bool > compare (const ValueT a, const ValueT b, const std::not_equal_to<> binary_op)
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, ValueT > compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element comparison. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v< std::invoke_result_t< BinaryOperation, ValueT, ValueT >, bool >, bool > unordered_compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs unordered comparison. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, ValueT > unordered_compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element unordered comparison. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, bool > compare_both (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element comparison and return true if both results are true. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, bool > unordered_compare_both (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element unordered comparison and return true if both results are true. More...
 
template<typename ValueT , class BinaryOperation >
unsigned compare_mask (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const BinaryOperation binary_op)
 Performs 2 elements comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements. More...
 
template<typename ValueT , class BinaryOperation >
unsigned unordered_compare_mask (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const BinaryOperation binary_op)
 Performs 2 elements unordered comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements. More...
 
template<typename S , typename T >
vectorized_max (T a, T b)
 Compute vectorized max for two values, with each value treated as a vector type S. More...
 
template<typename S , typename T >
vectorized_min (T a, T b)
 Compute vectorized min for two values, with each value treated as a vector type S. More...
 
template<typename VecT , class UnaryOperation >
unsigned vectorized_unary (unsigned a, const UnaryOperation unary_op)
 Compute vectorized unary operation for a value, with the value treated as a vector type VecT. More...
 
template<typename VecT >
unsigned vectorized_sum_abs_diff (unsigned a, unsigned b)
 Compute vectorized absolute difference for two values without modulo overflow, with each value treated as a vector type VecT. More...
 
template<typename S , typename T >
vectorized_isgreater (T a, T b)
 Compute vectorized isgreater for two values, with each value treated as a vector type S. More...
 
template<>
unsigned vectorized_isgreater< sycl::ushort2, unsigned > (unsigned a, unsigned b)
 Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short. More...
 
template<typename ValueT >
ValueT clamp (ValueT val, ValueT min_val, ValueT max_val)
 Returns min(max(val, min_val), max_val) More...
 
template<typename ValueT >
std::enable_if_t< ValueT::size()==2, ValueT > isnan (const ValueT a)
 Determine whether 2 element value is NaN. More...
 
template<typename ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, ValueT > cbrt (ValueT val)
 cbrt function wrapper. More...
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_integral_v< ValueT > &&std::is_integral_v< ValueU >, std::common_type_t< ValueT, ValueU > > min (ValueT a, ValueU b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT > &&std::is_floating_point_v< ValueU >, std::common_type_t< ValueT, ValueU > > min (ValueT a, ValueU b)
 
sycl::half min (sycl::half a, sycl::half b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_integral_v< ValueT > &&std::is_integral_v< ValueU >, std::common_type_t< ValueT, ValueU > > max (ValueT a, ValueU b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT > &&std::is_floating_point_v< ValueU >, std::common_type_t< ValueT, ValueU > > max (ValueT a, ValueU b)
 
sycl::half max (sycl::half a, sycl::half b)
 
template<typename ValueT , typename ValueU >
std::common_type_t< ValueT, ValueU > fmax_nan (const ValueT a, const ValueU b)
 Performs 2 elements comparison and returns the bigger one. More...
 
template<typename ValueT , typename ValueU >
sycl::vec< std::common_type_t< ValueT, ValueU >, 2 > fmax_nan (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueU, 2 > b)
 
template<typename ValueT , typename ValueU >
std::common_type_t< ValueT, ValueU > fmin_nan (const ValueT a, const ValueU b)
 Performs 2 elements comparison and returns the smaller one. More...
 
template<typename ValueT , typename ValueU >
sycl::vec< std::common_type_t< ValueT, ValueU >, 2 > fmin_nan (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueU, 2 > b)
 
float pow (const float a, const int b)
 
double pow (const double a, const int b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT >, ValueT > pow (const ValueT a, const ValueU b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t<!std::is_floating_point_v< ValueT >, double > pow (const ValueT a, const ValueU b)
 
template<typename ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, ValueT > relu (const ValueT a)
 Performs relu saturation. More...
 
template<class ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, sycl::vec< ValueT, 2 > > relu (const sycl::vec< ValueT, 2 > a)
 
template<class ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, sycl::marray< ValueT, 2 > > relu (const sycl::marray< ValueT, 2 > a)
 
template<typename T >
sycl::vec< T, 2 > cmul (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
 Computes the multiplication of two complex numbers. More...
 
template<typename T >
sycl::vec< T, 2 > cdiv (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
 Computes the division of two complex numbers. More...
 
template<typename T >
cabs (sycl::vec< T, 2 > x)
 Computes the magnitude of a complex number. More...
 
template<typename T >
sycl::vec< T, 2 > conj (sycl::vec< T, 2 > x)
 Computes the complex conjugate of a complex number. More...
 
template<typename ValueT >
sycl::vec< ValueT, 2 > cmul_add (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const sycl::vec< ValueT, 2 > c)
 Performs complex number multiply addition. More...
 
template<typename ValueT >
sycl::marray< ValueT, 2 > cmul_add (const sycl::marray< ValueT, 2 > a, const sycl::marray< ValueT, 2 > b, const sycl::marray< ValueT, 2 > c)
 
template<typename VecT , class BinaryOperation >
unsigned vectorized_binary (unsigned a, unsigned b, const BinaryOperation binary_op)
 Compute vectorized binary operation value for two values, with each value treated as a vector type VecT. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_add (AT a, BT b)
 Extend a and b to 33 bit and add them. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_add (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, add a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_add_sat (AT a, BT b)
 Extend a and b to 33 bit and add them with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_add_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, add a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_sub (AT a, BT b)
 Extend a and b to 33 bit and minus them. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_sub (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, minus a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_sub_sat (AT a, BT b)
 Extend a and b to 33 bit and minus them with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_sub_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, minus a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_absdiff (AT a, BT b)
 Extend a and b to 33 bit and do abs_diff. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_absdiff (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, abs_diff a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_absdiff_sat (AT a, BT b)
 Extend a and b to 33 bit and do abs_diff with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_absdiff_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, abs_diff a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_min (AT a, BT b)
 Extend a and b to 33 bit and return smaller one. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_min (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the smaller one in a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_min_sat (AT a, BT b)
 Extend a and b to 33 bit and return smaller one with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_min_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the smaller one in a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_max (AT a, BT b)
 Extend a and b to 33 bit and return bigger one. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_max (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the bigger one in a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_max_sat (AT a, BT b)
 Extend a and b to 33 bit and return bigger one with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_max_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the bigger one in a, b with saturation, then do second_op with c. More...
 
template<typename AllocT >
auto * local_mem ()
 
static void * malloc (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block on the device. More...
 
static void * malloc_host (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block on the host. More...
 
static void * malloc_shared (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block of usm_shared memory. More...
 
static pitched_data malloc (sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Allocate memory block for 3D array on the device. More...
 
static void * malloc (size_t &pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Allocate memory block for 2D array on the device. More...
 
static void free (void *ptr, sycl::queue q=get_default_queue())
 free More...
 
sycl::event free_async (const std::vector< void * > &pointers, const std::vector< sycl::event > &events, sycl::queue q=get_default_queue())
 Free the device memory pointed by a batch of pointers in pointers which are related to q after events completed. More...
 
static void memcpy (void *to_ptr, const void *from_ptr, size_t size, sycl::queue q=get_default_queue())
 Synchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr. More...
 
static sycl::event memcpy_async (void *to_ptr, const void *from_ptr, size_t size, sycl::queue q=get_default_queue())
 Asynchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr. More...
 
template<typename T >
static sycl::event memcpy_async (type_identity_t< T > *to_ptr, const type_identity_t< T > *from_ptr, size_t count, sycl::queue q=get_default_queue())
 Asynchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr. More...
 
template<typename T >
static void memcpy (type_identity_t< T > *to_ptr, const type_identity_t< T > *from_ptr, size_t count, sycl::queue q=get_default_queue())
 Synchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr. More...
 
static void memcpy (void *to_ptr, size_t to_pitch, const void *from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Synchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr. More...
 
static sycl::event memcpy_async (void *to_ptr, size_t to_pitch, const void *from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Asynchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr. More...
 
static void memcpy (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Synchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from. More...
 
static sycl::event memcpy_async (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Asynchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from. More...
 
template<class T >
static void fill (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
 Synchronously sets pattern to the first count elements starting from dev_ptr. More...
 
template<class T >
static sycl::event fill_async (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
 Asynchronously sets pattern to the first count elements starting from dev_ptr. More...
 
static void memset (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
 Synchronously sets value to the first size bytes starting from dev_ptr. More...
 
static void memset_d16 (void *dev_ptr, unsigned short value, size_t size, sycl::queue q=get_default_queue())
 Sets 2 bytes data value to the first size elements starting from dev_ptr in q synchronously. More...
 
static void memset_d32 (void *dev_ptr, unsigned int value, size_t size, sycl::queue q=get_default_queue())
 Sets 4 bytes data value to the first size elements starting from dev_ptr in q synchronously. More...
 
static sycl::event memset_async (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
 Sets 1 byte data value to the first size elements starting from dev_ptr in q asynchronously. More...
 
static sycl::event memset_d16_async (void *dev_ptr, unsigned short value, size_t size, sycl::queue q=get_default_queue())
 Sets 2 bytes data value to the first size elements starting from dev_ptr in q asynchronously. More...
 
static sycl::event memset_d32_async (void *dev_ptr, unsigned int value, size_t size, sycl::queue q=get_default_queue())
 Sets 4 bytes data value to the first size elements starting from dev_ptr in q asynchronously. More...
 
static void memset (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q synchronously. More...
 
static void memset_d16 (void *ptr, size_t pitch, unsigned short val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously. More...
 
static void memset_d32 (void *ptr, size_t pitch, unsigned int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously. More...
 
static sycl::event memset_async (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q asynchronously. More...
 
static sycl::event memset_d16_async (void *ptr, size_t pitch, unsigned short val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously. More...
 
static sycl::event memset_d32_async (void *ptr, size_t pitch, unsigned int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously. More...
 
static void memset (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Sets value to the 3D memory region specified by pitch in q. More...
 
static sycl::event memset_async (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Sets value to the 3D memory region specified by pitch in q. More...
 
int cast_double_to_int (double d, bool use_high32=true)
 Cast the high or low 32 bits of a double to an integer. More...
 
double cast_ints_to_double (int high32, int low32)
 Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double. More...
 
template<typename T >
reverse_bits (T a)
 Reverse the bit order of an unsigned integer. More...
 
unsigned int byte_level_permute (unsigned int a, unsigned int b, unsigned int s)
 
template<typename T >
int ffs (T a)
 Find position of first least significant set bit in an integer. More...
 
template<typename T >
select_from_sub_group (sycl::sub_group g, T x, int remote_local_id, int logical_sub_group_size=32)
 select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group. More...
 
template<typename T >
shift_sub_group_left (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
 shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left. More...
 
template<typename T >
shift_sub_group_right (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
 shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right. More...
 
template<typename T >
permute_sub_group_by_xor (sycl::sub_group g, T x, unsigned int mask, int logical_sub_group_size=32)
 permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask. More...
 
int get_sycl_language_version ()
 Inherited from the original SYCLomatic compatibility headers. More...
 
template<typename T >
unsigned int match_any_over_sub_group (sycl::sub_group g, unsigned member_mask, T value)
 The function match_any_over_sub_group conducts a comparison of values across work-items within a sub-group. More...
 
template<typename T >
unsigned int match_all_over_sub_group (sycl::sub_group g, unsigned member_mask, T value, int *pred)
 The function match_all_over_sub_group conducts a comparison of values across work-items within a sub-group. More...
 
queue_ptr int_as_queue_ptr (uintptr_t x)
 If x <= 2, then return a pointer to the default queue; otherwise, return x reinterpreted as a queue_ptr. More...
 

Typedef Documentation

◆ arith_t

template<typename T >
using syclcompat::arith_t = typedef typename arith<T>::type

Definition at line 42 of file traits.hpp.

◆ byte_t

using syclcompat::byte_t = typedef uint8_t

Definition at line 97 of file memory.hpp.

◆ constant_memory

template<class T , size_t Dimension>
using syclcompat::constant_memory = typedef device_memory<T, memory_region::constant, Dimension>

Definition at line 1159 of file memory.hpp.

◆ device_ptr

using syclcompat::device_ptr = typedef char *

Definition at line 84 of file device.hpp.

◆ err0

using syclcompat::err0 = typedef detail::generic_error_type<struct err0_tag, int>

Definition at line 132 of file util.hpp.

◆ err1

using syclcompat::err1 = typedef detail::generic_error_type<struct err1_tag, int>

Definition at line 133 of file util.hpp.

◆ event_ptr

Definition at line 80 of file device.hpp.

◆ global_memory

template<class T , size_t Dimension>
using syclcompat::global_memory = typedef device_memory<T, memory_region::global, Dimension>

Definition at line 1157 of file memory.hpp.

◆ kernel_functor

typedef void(* syclcompat::kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)

Definition at line 59 of file kernel.hpp.

◆ queue_ptr

Definition at line 82 of file device.hpp.

◆ shared_memory

template<class T , size_t Dimension>
using syclcompat::shared_memory = typedef device_memory<T, memory_region::usm_shared, Dimension>

Definition at line 1161 of file memory.hpp.

◆ type_identity_t

template<class T >
using syclcompat::type_identity_t = typedef typename type_identity<T>::type

Definition at line 35 of file traits.hpp.

Enumeration Type Documentation

◆ error_code

Enumerator
SUCCESS 
BACKEND_ERROR 
DEFAULT_ERROR 

Definition at line 59 of file defs.hpp.

◆ memory_region

Enumerator
global 
constant 
local 
usm_shared 

Definition at line 88 of file memory.hpp.

◆ target

enum syclcompat::target
strong
Enumerator
device 
local 

Definition at line 95 of file memory.hpp.

Function Documentation

◆ atomic_compare_exchange_strong() [1/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_compare_exchange_strong ( sycl::multi_ptr< T, sycl::access::address_space::generic_space >  addr,
type_identity_t< T >  expected,
type_identity_t< T >  desired,
sycl::memory_order  success = sycl::memory_order::relaxed,
sycl::memory_order  fail = sycl::memory_order::relaxed 
)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters
[in,out]addrMulti_ptr.
expectedThe value to compare against the value at addr.
desiredThe value to assign to addr if the value at addr is expected.
successThe memory ordering used when comparison succeeds.
failThe memory ordering used when comparison fails.
Returns
The value at the addr before the call.

Definition at line 253 of file atomic.hpp.

◆ atomic_compare_exchange_strong() [2/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_compare_exchange_strong ( T *  addr,
type_identity_t< T >  expected,
type_identity_t< T >  desired,
sycl::memory_order  success = sycl::memory_order::relaxed,
sycl::memory_order  fail = sycl::memory_order::relaxed 
)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters
[in]addrThe pointer to the data.
expectedThe value to compare against the value at addr.
desiredThe value to assign to addr if the value at addr is expected.
successThe memory ordering used when comparison succeeds.
failThe memory ordering used when comparison fails.
Returns
The value at the addr before the call.

Definition at line 279 of file atomic.hpp.

◆ atomic_exchange()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_exchange ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically exchange the value at the address addr with the value operand.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to be exchanged with the value pointed by addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 232 of file atomic.hpp.

◆ atomic_fetch_add()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_add ( T *  addr,
arith_t< T >  operand 
)
inline

Atomically add the value operand to the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to add to the value at addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 56 of file atomic.hpp.

◆ atomic_fetch_and()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_and ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise AND operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 91 of file atomic.hpp.

◆ atomic_fetch_compare_dec()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int syclcompat::atomic_fetch_compare_dec ( unsigned int *  addr,
unsigned int  operand 
)

Atomically set operand to the value stored in addr, if old value stored in addr is equal to zero or greater than operand, else decrease the value stored in addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe threshold value.
memoryOrderThe memory ordering used.
Returns
The old value stored in addr.

Definition at line 176 of file atomic.hpp.

◆ atomic_fetch_compare_inc()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int syclcompat::atomic_fetch_compare_inc ( unsigned int *  addr,
unsigned int  operand 
)
inline

Atomically increment the value stored in addr if old value stored in addr is less than operand, else set 0 to the value stored in addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe threshold value.
memoryOrderThe memory ordering used.
Returns
The old value stored in addr.

Definition at line 205 of file atomic.hpp.

◆ atomic_fetch_max()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_max ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operand.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 160 of file atomic.hpp.

◆ atomic_fetch_min()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_min ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operand.

Definition at line 143 of file atomic.hpp.

◆ atomic_fetch_or()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_or ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically or the value at the addr with the value operand, and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise OR operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 109 of file atomic.hpp.

◆ atomic_fetch_sub()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_sub ( T *  addr,
arith_t< T >  operand 
)
inline

Atomically subtract the value operand from the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to subtract from the value at addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 73 of file atomic.hpp.

◆ atomic_fetch_xor()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_xor ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically xor the value at the addr with the value operand, and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise XOR operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 127 of file atomic.hpp.

◆ byte_level_permute()

unsigned int syclcompat::byte_level_permute ( unsigned int  a,
unsigned int  b,
unsigned int  s 
)
inline
Parameters
[in]aThe first value contains 4 bytes
[in]bThe second value contains 4 bytes
[in]sThe selector value, only lower 16bit used
Returns
the permutation result of 4 bytes selected in the way specified by s from a and b

Definition at line 181 of file util.hpp.

◆ cabs()

template<typename T >
T syclcompat::cabs ( sycl::vec< T, 2 >  x)

Computes the magnitude of a complex number.

Template Parameters
TComplex element type
Parameters
[in]xThe input complex number
Returns
The result

Definition at line 572 of file math.hpp.

References sycl::_V1::ext::intel::esimd::abs().

◆ cast_double_to_int()

int syclcompat::cast_double_to_int ( double  d,
bool  use_high32 = true 
)
inline

Cast the high or low 32 bits of a double to an integer.

Parameters
[in]dThe double value.
[in]use_high32Cast the high 32 bits of the double if true; otherwise cast the low 32 bits.

Definition at line 139 of file util.hpp.

◆ cast_ints_to_double()

double syclcompat::cast_ints_to_double ( int  high32,
int  low32 
)
inline

Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double.

Parameters
[in]high32The integer as the high 32 bits
[in]low32The integer as the low 32 bits

Definition at line 151 of file util.hpp.

◆ cbrt()

template<typename ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, ValueT> syclcompat::cbrt ( ValueT  val)
inline

cbrt function wrapper.

Definition at line 417 of file math.hpp.

◆ cdiv()

template<typename T >
sycl::vec<T, 2> syclcompat::cdiv ( sycl::vec< T, 2 >  x,
sycl::vec< T, 2 >  y 
)

Computes the division of two complex numbers.

Template Parameters
TComplex element type
Parameters
[in]xThe first input complex number
[in]yThe second input complex number
Returns
The result

Definition at line 562 of file math.hpp.

◆ clamp()

template<typename ValueT >
ValueT syclcompat::clamp ( ValueT  val,
ValueT  min_val,
ValueT  max_val 
)
inline

Returns min(max(val, min_val), max_val)

Parameters
[in]valThe input value
[in]min_valThe minimum value
[in]max_valThe maximum value
Returns
the value between min_val and max_val

Definition at line 400 of file math.hpp.

References syclcompat::detail::clamp().

◆ cmul()

template<typename T >
sycl::vec<T, 2> syclcompat::cmul ( sycl::vec< T, 2 >  x,
sycl::vec< T, 2 >  y 
)

Computes the multiplication of two complex numbers.

Template Parameters
TComplex element type
Parameters
[in]xThe first input complex number
[in]yThe second input complex number
Returns
The result

Definition at line 550 of file math.hpp.

◆ cmul_add() [1/2]

template<typename ValueT >
sycl::marray<ValueT, 2> syclcompat::cmul_add ( const sycl::marray< ValueT, 2 >  a,
const sycl::marray< ValueT, 2 >  b,
const sycl::marray< ValueT, 2 >  c 
)
inline

Definition at line 603 of file math.hpp.

◆ cmul_add() [2/2]

template<typename ValueT >
sycl::vec<ValueT, 2> syclcompat::cmul_add ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueT, 2 >  b,
const sycl::vec< ValueT, 2 >  c 
)
inline

Performs complex number multiply addition.

Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
the operation result

Definition at line 593 of file math.hpp.

◆ compare() [1/3]

template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v<std::invoke_result_t<BinaryOperation, ValueT, ValueT>, bool>, bool> syclcompat::compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 188 of file math.hpp.

Referenced by compare(), compare_both(), and compare_mask().

◆ compare() [2/3]

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 207 of file math.hpp.

References compare().

◆ compare() [3/3]

template<typename ValueT >
std::enable_if_t< std::is_same_v<std::invoke_result_t<std::not_equal_to<>, ValueT, ValueT>, bool>, bool> syclcompat::compare ( const ValueT  a,
const ValueT  b,
const std::not_equal_to<>  binary_op 
)
inline

Definition at line 196 of file math.hpp.

References syclcompat::detail::isnan().

◆ compare_both()

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, bool> syclcompat::compare_both ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element comparison and return true if both results are true.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 245 of file math.hpp.

References compare().

◆ compare_mask()

template<typename ValueT , class BinaryOperation >
unsigned syclcompat::compare_mask ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueT, 2 >  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 elements comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 271 of file math.hpp.

References compare().

◆ compute_nd_range() [1/2]

sycl::nd_range<1> syclcompat::compute_nd_range ( int  global_size_in,
int  work_group_size 
)
inline

Definition at line 110 of file launch.hpp.

References sycl::_V1::ext::oneapi::experimental::work_group_size.

◆ compute_nd_range() [2/2]

template<int Dim>
sycl::nd_range<Dim> syclcompat::compute_nd_range ( sycl::range< Dim >  global_size_in,
sycl::range< Dim >  work_group_size 
)
inline

◆ conj()

template<typename T >
sycl::vec<T, 2> syclcompat::conj ( sycl::vec< T, 2 >  x)

Computes the complex conjugate of a complex number.

Template Parameters
TComplex element type
Parameters
[in]xThe input complex number
Returns
The result

Definition at line 581 of file math.hpp.

◆ cpu_device()

static device_ext& syclcompat::cpu_device ( )
inlinestatic

Util function to get a CPU device.

Definition at line 788 of file device.hpp.

References syclcompat::detail::dev_mgr::cpu_device(), and syclcompat::detail::dev_mgr::instance().

◆ create_queue()

static sycl::queue syclcompat::create_queue ( bool  print_on_async_exceptions = false,
bool  in_order = true 
)
inlinestatic

◆ destroy_event()

static void syclcompat::destroy_event ( event_ptr  event)
static

Destroy event pointed memory.

Parameters
eventPointer to the sycl::event address.

Definition at line 89 of file device.hpp.

◆ extend_absdiff() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_absdiff ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and do abs_diff.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend abs_diff of the two values

Definition at line 825 of file math.hpp.

◆ extend_absdiff() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_absdiff ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, abs_diff a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend abs_diff of a, b and second_op with c

Definition at line 843 of file math.hpp.

◆ extend_absdiff_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_absdiff_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and do abs_diff with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend abs_diff of the two values with saturation

Definition at line 856 of file math.hpp.

◆ extend_absdiff_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_absdiff_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, abs_diff a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend abs_diff of a, b with saturation and second_op with c

Definition at line 875 of file math.hpp.

◆ extend_add() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_add ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and add them.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend addition of the two values

Definition at line 703 of file math.hpp.

◆ extend_add() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_add ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, add a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend addition of a, b and second_op with c

Definition at line 720 of file math.hpp.

◆ extend_add_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_add_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and add them with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend addition of the two values with saturation

Definition at line 732 of file math.hpp.

◆ extend_add_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_add_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, add a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend addition of a, b with saturation and second_op with c

Definition at line 751 of file math.hpp.

◆ extend_max() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_max ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return bigger one.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The bigger one of the two extended values

Definition at line 950 of file math.hpp.

◆ extend_max() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_max ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the bigger one in a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The bigger one of a, b and second_op with c

Definition at line 968 of file math.hpp.

◆ extend_max_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_max_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return bigger one with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The bigger one of the two extended values with saturation

Definition at line 980 of file math.hpp.

◆ extend_max_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_max_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the bigger one in a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The bigger one of a, b with saturation and second_op with c

Definition at line 999 of file math.hpp.

◆ extend_min() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_min ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return smaller one.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The smaller one of the two extended values

Definition at line 888 of file math.hpp.

◆ extend_min() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_min ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the smaller one in a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The smaller one of a, b and second_op with c

Definition at line 906 of file math.hpp.

◆ extend_min_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_min_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return smaller one with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The smaller one of the two extended values with saturation

Definition at line 918 of file math.hpp.

◆ extend_min_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_min_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the smaller one in a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The smaller one of a, b with saturation and second_op with c

Definition at line 937 of file math.hpp.

◆ extend_sub() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_sub ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and minus them.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend subtraction of the two values

Definition at line 764 of file math.hpp.

◆ extend_sub() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_sub ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, minus a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend subtraction of a, b and second_op with c

Definition at line 781 of file math.hpp.

◆ extend_sub_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_sub_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and minus them with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend subtraction of the two values with saturation

Definition at line 793 of file math.hpp.

◆ extend_sub_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_sub_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, minus a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend subtraction of a, b with saturation and second_op with c

Definition at line 812 of file math.hpp.

◆ fast_length()

float syclcompat::fast_length ( const float *  a,
int  len 
)
inline

Compute fast_length for variable-length array.

Parameters
[in]aThe array
[in]lenLength of the array
Returns
The computed fast_length

Definition at line 136 of file math.hpp.

References sycl::_V1::ext::intel::math::sqrt().

◆ ffs()

template<typename T >
int syclcompat::ffs ( a)
inline

Find position of first least significant set bit in an integer.

ffs(0) returns 0.

Parameters
[in]aInput integer value
Returns
The position

Definition at line 197 of file util.hpp.

◆ fill()

template<class T >
static void syclcompat::fill ( void *  dev_ptr,
const T &  pattern,
size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously sets pattern to the first count elements starting from dev_ptr.

The function will return after the fill operation is completed.

Template Parameters
TDatatype of the value to be set.
Parameters
dev_ptrPointer to the device memory address.
patternPattern of type T to be set.
countNumber of elements to be set to the patten.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 721 of file memory.hpp.

Referenced by fill_async().

◆ fill_async()

template<class T >
static sycl::event syclcompat::fill_async ( void *  dev_ptr,
const T &  pattern,
size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously sets pattern to the first count elements starting from dev_ptr.

The return of the function does NOT guarantee the fill operation is completed.

Template Parameters
TDatatype of the pattern to be set.
Parameters
dev_ptrPointer to the device memory address.
patternPattern of type T to be set.
countNumber of elements to be set to the patten.
qThe queue in which the operation is done.
Returns
An event representing the fill operation.

Definition at line 738 of file memory.hpp.

References fill().

◆ fmax_nan() [1/2]

template<typename ValueT , typename ValueU >
sycl::vec<std::common_type_t<ValueT, ValueU>, 2> syclcompat::fmax_nan ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueU, 2 >  b 
)
inline

Definition at line 475 of file math.hpp.

References fmax_nan().

◆ fmax_nan() [2/2]

template<typename ValueT , typename ValueU >
std::common_type_t<ValueT, ValueU> syclcompat::fmax_nan ( const ValueT  a,
const ValueU  b 
)
inline

Performs 2 elements comparison and returns the bigger one.

If either of inputs is NaN, then return NaN.

Parameters
[in]aThe first value
[in]bThe second value
Returns
the bigger value

Definition at line 466 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by fmax_nan().

◆ fmin_nan() [1/2]

template<typename ValueT , typename ValueU >
sycl::vec<std::common_type_t<ValueT, ValueU>, 2> syclcompat::fmin_nan ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueU, 2 >  b 
)
inline

Definition at line 494 of file math.hpp.

References fmin_nan().

◆ fmin_nan() [2/2]

template<typename ValueT , typename ValueU >
std::common_type_t<ValueT, ValueU> syclcompat::fmin_nan ( const ValueT  a,
const ValueU  b 
)
inline

Performs 2 elements comparison and returns the smaller one.

If either of inputs is NaN, then return NaN.

Parameters
[in]aThe first value
[in]bThe second value
Returns
the smaller value

Definition at line 485 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by fmin_nan().

◆ free()

static void syclcompat::free ( void *  ptr,
sycl::queue  q = get_default_queue() 
)
inlinestatic

free

Parameters
ptrPoint to free.
qQueue to execute the free task.
Returns
no return value.

Definition at line 536 of file memory.hpp.

References sycl::_V1::queue::get_context().

Referenced by syclcompat::device_memory< T, Memory, Dimension >::~device_memory().

◆ free_async()

sycl::event syclcompat::free_async ( const std::vector< void * > &  pointers,
const std::vector< sycl::event > &  events,
sycl::queue  q = get_default_queue() 
)
inline

Free the device memory pointed by a batch of pointers in pointers which are related to q after events completed.

Parameters
pointersThe pointers point to the device memory requested to be freed.
eventsThe events to be waited.
qThe sycl::queue the memory relates to.

Definition at line 549 of file memory.hpp.

◆ get_current_device()

static device_ext& syclcompat::get_current_device ( )
inlinestatic

◆ get_current_device_id()

static unsigned int syclcompat::get_current_device_id ( )
inlinestatic

Util function to get the id of current device in device manager.

Definition at line 767 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device_id(), and syclcompat::detail::dev_mgr::instance().

◆ get_default_context()

static sycl::context syclcompat::get_default_context ( )
inlinestatic

Util function to get the context of the default queue of current device in device manager.

Definition at line 783 of file device.hpp.

References syclcompat::device_ext::get_context(), and get_current_device().

◆ get_default_queue()

static sycl::queue syclcompat::get_default_queue ( )
inlinestatic

Util function to get the default queue of current device in device manager.

Definition at line 744 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::device_ext::default_queue(), and syclcompat::detail::dev_mgr::instance().

Referenced by launch().

◆ get_device()

◆ get_device_id()

static unsigned int syclcompat::get_device_id ( const sycl::device dev)
inlinestatic

◆ get_kernel_function()

static kernel_function syclcompat::get_kernel_function ( kernel_library library,
const std::string &  name 
)
inlinestatic

Find kernel function in a kernel library and return its address.

Parameters
[in]libraryHandle to the kernel library.
[in]nameName of the kernel function.

Definition at line 435 of file kernel.hpp.

◆ get_kernel_function_info() [1/2]

static kernel_function_info syclcompat::get_kernel_function_info ( const void *  function)
inlinestatic

◆ get_kernel_function_info() [2/2]

static void syclcompat::get_kernel_function_info ( kernel_function_info kernel_info,
const void *  function 
)
inlinestatic

◆ get_sycl_language_version()

int syclcompat::get_sycl_language_version ( )
inline

Inherited from the original SYCLomatic compatibility headers.

Returns
compiler's SYCL version if defined, 202000 otherwise.

Definition at line 501 of file util.hpp.

◆ int_as_queue_ptr()

queue_ptr syclcompat::int_as_queue_ptr ( uintptr_t  x)
inline

If x <= 2, then return a pointer to the default queue; otherwise, return x reinterpreted as a queue_ptr.

Definition at line 923 of file util.hpp.

◆ invoke_kernel_function()

static void syclcompat::invoke_kernel_function ( kernel_function function,
sycl::queue queue,
sycl::range< 3 >  group_range,
sycl::range< 3 >  local_range,
unsigned int  local_mem_size,
void **  kernel_params,
void **  extra 
)
inlinestatic

Invoke a kernel function.

Parameters
[in]functionkernel function.
[in]queueSYCL queue used to execute kernel
[in]group_rangeSYCL group range
[in]local_rangeSYCL local range
[in]local_mem_sizeThe size of local memory required by the kernel function.
[in]kernel_paramsArray of pointers to kernel arguments.
[in]extraExtra arguments.

Definition at line 459 of file kernel.hpp.

◆ isnan()

template<typename ValueT >
std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::isnan ( const ValueT  a)
inline

Determine whether 2 element value is NaN.

Parameters
[in]aThe input value
Returns
the comparison result

Definition at line 408 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by syclcompat::detail::isnan().

◆ launch() [1/8]

template<auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const dim3 grid,
const dim3 threads,
Args...  args 
)

Definition at line 136 of file launch.hpp.

References get_default_queue().

◆ launch() [2/8]

template<auto F, typename... Args>
sycl::event syclcompat::launch ( const dim3 grid,
const dim3 threads,
size_t  mem_size,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
gridGrid dimensions represented with an (x, y, z) iteration space.
threadsBlock dimensions represented with an (x, y, z) iteration space.
mem_sizeThe size, in number of bytes, of the local memory to be allocated.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 218 of file launch.hpp.

References get_default_queue().

◆ launch() [3/8]

template<auto F, typename... Args>
sycl::event syclcompat::launch ( const dim3 grid,
const dim3 threads,
size_t  mem_size,
sycl::queue  q,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
gridGrid dimensions represented with an (x, y, z) iteration space.
threadsBlock dimensions represented with an (x, y, z) iteration space.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 196 of file launch.hpp.

◆ launch() [4/8]

template<auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const dim3 grid,
const dim3 threads,
sycl::queue  q,
Args...  args 
)

Definition at line 130 of file launch.hpp.

◆ launch() [5/8]

template<auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const sycl::nd_range< Dim > &  range,
Args...  args 
)

Definition at line 123 of file launch.hpp.

References get_default_queue().

◆ launch() [6/8]

template<auto F, int Dim, typename... Args>
sycl::event syclcompat::launch ( const sycl::nd_range< Dim > &  range,
size_t  mem_size,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
rangeNd_range specifying the work group and global sizes for the kernel.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 175 of file launch.hpp.

References get_default_queue().

◆ launch() [7/8]

template<auto F, int Dim, typename... Args>
sycl::event syclcompat::launch ( const sycl::nd_range< Dim > &  range,
size_t  mem_size,
sycl::queue  q,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
rangeNd_range specifying the work group and global sizes for the kernel.
qThe SYCL queue on which to execute the kernel.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 155 of file launch.hpp.

◆ launch() [8/8]

template<auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const sycl::nd_range< Dim > &  range,
sycl::queue  q,
Args...  args 
)

Definition at line 117 of file launch.hpp.

◆ length()

template<typename ValueT >
ValueT syclcompat::length ( const ValueT *  a,
const int  len 
)
inline

Calculate the square root of the input array.

Parameters
[in]aThe array pointer
[in]lenLength of the array
Returns
The square root

Definition at line 161 of file math.hpp.

References sycl::_V1::ext::intel::math::sqrt().

Referenced by syclcompat::device_info::set_name(), and sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::writeRegion().

◆ load_kernel_library()

static kernel_library syclcompat::load_kernel_library ( const std::string &  name)
inlinestatic

Load kernel library and return a handle to use the library.

Parameters
[in]nameThe name of the library.

Definition at line 386 of file kernel.hpp.

References syclcompat::detail::load_dl_from_data().

◆ load_kernel_library_mem()

static kernel_library syclcompat::load_kernel_library_mem ( char const *const  image)
inlinestatic

Load kernel library whose image is alreay in memory and return a handle to use the library.

Parameters
[in]imageA pointer to the image in memory.

Definition at line 400 of file kernel.hpp.

References syclcompat::detail::get_lib_size(), and syclcompat::detail::load_dl_from_data().

◆ local_mem()

template<typename AllocT >
auto* syclcompat::local_mem ( )

Definition at line 69 of file memory.hpp.

Referenced by syclcompat::detail::launch().

◆ malloc() [1/3]

static void* syclcompat::malloc ( size_t &  pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block for 2D array on the device.

Parameters
[out]pitchAligned size of x in bytes.
xRange in dim x.
yRange in dim y.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 527 of file memory.hpp.

Referenced by syclcompat::device_memory< T, Memory, Dimension >::device_memory(), and malloc().

◆ malloc() [2/3]

static void* syclcompat::malloc ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block on the device.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.
Parameters
TDatatype to allocate
countNumber of elements to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 453 of file memory.hpp.

References malloc().

◆ malloc() [3/3]

static pitched_data syclcompat::malloc ( sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block for 3D array on the device.

Parameters
sizeSize of the memory block, in bytes.
qQueue to execute the allocate task.
Returns
A pitched_data object which stores the memory info.

Definition at line 511 of file memory.hpp.

References sycl::_V1::detail::array< dimensions >::get(), malloc(), syclcompat::pitched_data::set_data_ptr(), and syclcompat::pitched_data::set_pitch().

◆ malloc_host()

static void* syclcompat::malloc_host ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block on the host.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.
Parameters
TDatatype to allocate
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 472 of file memory.hpp.

◆ malloc_shared()

static void* syclcompat::malloc_shared ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block of usm_shared memory.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 492 of file memory.hpp.

◆ match_all_over_sub_group()

template<typename T >
unsigned int syclcompat::match_all_over_sub_group ( sycl::sub_group  g,
unsigned  member_mask,
value,
int *  pred 
)

The function match_all_over_sub_group conducts a comparison of values across work-items within a sub-group.

match_all_over_sub_group return member_mask and predicate pred will be set to 1 if all value that provided by each work-item in member_mask are equal, otherwise return 0 and the predicate pred will be set to 0. The n-th bit of member_mask representing the work-item with id n. The parameter member_mask indicating the work-items participating the call.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]member_maskInput mask
[in]valueInput value
[out]predOutput predicate
Returns
The result

Definition at line 560 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ match_any_over_sub_group()

template<typename T >
unsigned int syclcompat::match_any_over_sub_group ( sycl::sub_group  g,
unsigned  member_mask,
value 
)

The function match_any_over_sub_group conducts a comparison of values across work-items within a sub-group.

match_any_over_sub_group return a mask in which some bits are set to 1, indicating that the value provided by the work-item represented by these bits are equal. The n-th bit of mask representing the work-item with id n. The parameter member_mask indicating the work-items participating the call.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]member_maskInput mask
[in]valueInput value
Returns
The result

Definition at line 521 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ max() [1/3]

sycl::half syclcompat::max ( sycl::half  a,
sycl::half  b 
)

Definition at line 458 of file math.hpp.

Referenced by syclcompat::detail::extend_binary().

◆ max() [2/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_integral_v<ValueT> && std::is_integral_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::max ( ValueT  a,
ValueU  b 
)

Definition at line 446 of file math.hpp.

◆ max() [3/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_floating_point_v<ValueT> && std::is_floating_point_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::max ( ValueT  a,
ValueU  b 
)

Definition at line 454 of file math.hpp.

◆ memcpy() [1/4]

static void syclcompat::memcpy ( pitched_data  to,
sycl::id< 3 >  to_pos,
pitched_data  from,
sycl::id< 3 >  from_pos,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size.

Parameters
toDestination matrix info.
to_posPosition of destination.
fromSource matrix info.
from_posPosition of destination.
sizeRange of the submatrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 684 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

Referenced by syclcompat::device_memory< T, Memory, Dimension >::init(), memcpy(), and memcpy_async().

◆ memcpy() [2/4]

template<typename T >
static void syclcompat::memcpy ( type_identity_t< T > *  to_ptr,
const type_identity_t< T > *  from_ptr,
size_t  count,
sycl::queue  q = get_default_queue() 
)
static

Synchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Template Parameters
TDatatype to be copied.
Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
countNumber of T to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 621 of file memory.hpp.

References memcpy().

◆ memcpy() [3/4]

static void syclcompat::memcpy ( void *  to_ptr,
const void *  from_ptr,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Synchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
sizeNumber of bytes to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 573 of file memory.hpp.

References memcpy().

Referenced by syclcompat::detail::memcpy().

◆ memcpy() [4/4]

static void syclcompat::memcpy ( void *  to_ptr,
size_t  to_pitch,
const void *  from_ptr,
size_t  from_pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The function will return after the copy is completed.

Parameters
to_ptrPointer to destination memory address.
to_pitchRange of dim x in bytes of destination matrix.
from_ptrPointer to source memory address.
from_pitchRange of dim x in bytes of source matrix.
xRange of dim x of matrix to be copied.
yRange of dim y of matrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 643 of file memory.hpp.

References memcpy(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memcpy_async() [1/4]

static sycl::event syclcompat::memcpy_async ( pitched_data  to,
sycl::id< 3 >  to_pos,
pitched_data  from,
sycl::id< 3 >  from_pos,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size. The return of the function does NOT guarantee the copy is completed.

Parameters
toDestination matrix info.
to_posPosition of destination.
fromSource matrix info.
from_posPosition of destination.
sizeRange of the submatrix to be copied.
qQueue to execute the copy task.
Returns
An event representing the memcpy operation.

Definition at line 703 of file memory.hpp.

References syclcompat::detail::combine_events(), and memcpy().

◆ memcpy_async() [2/4]

template<typename T >
static sycl::event syclcompat::memcpy_async ( type_identity_t< T > *  to_ptr,
const type_identity_t< T > *  from_ptr,
size_t  count,
sycl::queue  q = get_default_queue() 
)
static

Asynchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Template Parameters
TDatatype to be copied.
Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
countNumber of T to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 604 of file memory.hpp.

References memcpy().

◆ memcpy_async() [3/4]

static sycl::event syclcompat::memcpy_async ( void *  to_ptr,
const void *  from_ptr,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Asynchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
sizeNumber of bytes to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 587 of file memory.hpp.

References memcpy().

◆ memcpy_async() [4/4]

static sycl::event syclcompat::memcpy_async ( void *  to_ptr,
size_t  to_pitch,
const void *  from_ptr,
size_t  from_pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters
to_ptrPointer to destination memory address.
to_pitchRange of dim x in bytes of destination matrix.
from_ptrPointer to source memory address.
from_pitchRange of dim x in bytes of source matrix.
xRange of dim x of matrix to be copied.
yRange of dim y of matrix to be copied.
qQueue to execute the copy task.
Returns
An event representing the memcpy operation.

Definition at line 664 of file memory.hpp.

References syclcompat::detail::combine_events(), and memcpy().

◆ memset() [1/3]

static void syclcompat::memset ( pitched_data  pitch,
int  val,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The function will return after the memset operation is completed.

Parameters
pitchSpecify the 3D memory region.
valueValue to be set.
sizeThe setted 3D memory size.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 915 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

Referenced by memset(), memset_async(), memset_d16(), memset_d16_async(), memset_d32(), and memset_d32_async().

◆ memset() [2/3]

static void syclcompat::memset ( void *  dev_ptr,
int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Synchronously sets value to the first size bytes starting from dev_ptr.

The function will return after the memset operation is completed.

Parameters
dev_ptrPointer to the device memory address.
valueValue to be set.
sizeNumber of bytes to be set to the value.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 752 of file memory.hpp.

References memset().

◆ memset() [3/3]

static void syclcompat::memset ( void *  ptr,
size_t  pitch,
int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.

Definition at line 824 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_async() [1/3]

static sycl::event syclcompat::memset_async ( pitched_data  pitch,
int  val,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The return of the function does NOT guarantee the memset operation is completed.

Parameters
pitchSpecify the 3D memory region.
valueValue to be set.
sizeThe setted 3D memory size.
qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 929 of file memory.hpp.

References syclcompat::detail::combine_events().

◆ memset_async() [2/3]

static sycl::event syclcompat::memset_async ( void *  dev_ptr,
int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 1 byte data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters
dev_ptrPointer to the device memory address.
valueValue to be set.
sizeNumber of bytes to be set to the value.
Returns
An event representing the memset operation.

Definition at line 785 of file memory.hpp.

References memset().

◆ memset_async() [3/3]

static sycl::event syclcompat::memset_async ( void *  ptr,
size_t  pitch,
int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 866 of file memory.hpp.

References syclcompat::detail::combine_events().

◆ memset_d16() [1/2]

static void syclcompat::memset_d16 ( void *  dev_ptr,
unsigned short  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data value to the first size elements starting from dev_ptr in q synchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.

Definition at line 763 of file memory.hpp.

◆ memset_d16() [2/2]

static void syclcompat::memset_d16 ( void *  ptr,
size_t  pitch,
unsigned short  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.

Definition at line 837 of file memory.hpp.

References memset(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_d16_async() [1/2]

static sycl::event syclcompat::memset_d16_async ( void *  dev_ptr,
unsigned short  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 798 of file memory.hpp.

◆ memset_d16_async() [2/2]

static sycl::event syclcompat::memset_d16_async ( void *  ptr,
size_t  pitch,
unsigned short  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 884 of file memory.hpp.

References syclcompat::detail::combine_events(), and memset().

◆ memset_d32() [1/2]

static void syclcompat::memset_d32 ( void *  dev_ptr,
unsigned int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data value to the first size elements starting from dev_ptr in q synchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.

Definition at line 774 of file memory.hpp.

◆ memset_d32() [2/2]

static void syclcompat::memset_d32 ( void *  ptr,
size_t  pitch,
unsigned int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.

Definition at line 851 of file memory.hpp.

References memset(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_d32_async() [1/2]

static sycl::event syclcompat::memset_d32_async ( void *  dev_ptr,
unsigned int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 811 of file memory.hpp.

◆ memset_d32_async() [2/2]

static sycl::event syclcompat::memset_d32_async ( void *  ptr,
size_t  pitch,
unsigned int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 900 of file memory.hpp.

References syclcompat::detail::combine_events(), and memset().

◆ min() [1/3]

sycl::half syclcompat::min ( sycl::half  a,
sycl::half  b 
)

Definition at line 441 of file math.hpp.

Referenced by syclcompat::detail::extend_binary().

◆ min() [2/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_integral_v<ValueT> && std::is_integral_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::min ( ValueT  a,
ValueU  b 
)

Definition at line 429 of file math.hpp.

◆ min() [3/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_floating_point_v<ValueT> && std::is_floating_point_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::min ( ValueT  a,
ValueU  b 
)

Definition at line 437 of file math.hpp.

◆ operator*()

dim3 syclcompat::operator* ( const dim3 a,
const dim3 b 
)
inline

Definition at line 60 of file dims.hpp.

◆ operator+()

dim3 syclcompat::operator+ ( const dim3 a,
const dim3 b 
)
inline

Definition at line 64 of file dims.hpp.

◆ operator-()

dim3 syclcompat::operator- ( const dim3 a,
const dim3 b 
)
inline

Definition at line 68 of file dims.hpp.

◆ permute_sub_group_by_xor()

template<typename T >
T syclcompat::permute_sub_group_by_xor ( sycl::sub_group  g,
x,
unsigned int  mask,
int  logical_sub_group_size = 32 
)

permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is bitwise exclusive OR of the caller's id and mask. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]maskInput mask
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 297 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ pow() [1/4]

double syclcompat::pow ( const double  a,
const int  b 
)
inline

Definition at line 500 of file math.hpp.

◆ pow() [2/4]

float syclcompat::pow ( const float  a,
const int  b 
)
inline

Definition at line 499 of file math.hpp.

◆ pow() [3/4]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_floating_point_v<ValueT>, ValueT> syclcompat::pow ( const ValueT  a,
const ValueU  b 
)
inline

Definition at line 504 of file math.hpp.

References pow().

◆ pow() [4/4]

template<typename ValueT , typename ValueU >
std::enable_if_t<!std::is_floating_point_v<ValueT>, double> syclcompat::pow ( const ValueT  a,
const ValueU  b 
)
inline

Definition at line 513 of file math.hpp.

Referenced by pow().

◆ relu() [1/3]

template<class ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, sycl::marray<ValueT, 2> > syclcompat::relu ( const sycl::marray< ValueT, 2 >  a)
inline

Definition at line 540 of file math.hpp.

References relu().

◆ relu() [2/3]

template<class ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, sycl::vec<ValueT, 2> > syclcompat::relu ( const sycl::vec< ValueT, 2 >  a)
inline

Definition at line 533 of file math.hpp.

References relu().

◆ relu() [3/3]

template<typename ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, ValueT> syclcompat::relu ( const ValueT  a)
inline

Performs relu saturation.

Parameters
[in]aThe input value
Returns
the relu saturation result

Definition at line 524 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by relu().

◆ reverse_bits()

template<typename T >
T syclcompat::reverse_bits ( a)
inline

Reverse the bit order of an unsigned integer.

Parameters
[in]aInput unsigned integer value
Returns
Value of a with the bit order reversed

Definition at line 160 of file util.hpp.

◆ select_device() [1/2]

template<class DeviceSelector >
static std::enable_if_t< std::is_invocable_r_v<int, DeviceSelector, const sycl::device &> > syclcompat::select_device ( const DeviceSelector &  selector = sycl::gpu_selector_v)
inlinestatic

◆ select_device() [2/2]

static unsigned int syclcompat::select_device ( unsigned int  id)
inlinestatic

◆ select_from_sub_group()

template<typename T >
T syclcompat::select_from_sub_group ( sycl::sub_group  g,
x,
int  remote_local_id,
int  logical_sub_group_size = 32 
)

select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size

  • 1]. Each work-item in logical sub_group gets value from another work-item whose id is remote_local_id. If remote_local_id is outside the logical sub_group id range, remote_local_id will modulo with logical_sub_group_size. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.
    Template Parameters
    TInput value type
    Parameters
    [in]gInput sub_group
    [in]xInput value
    [in]remote_local_idInput source work item id
    [in]logical_sub_group_sizeInput logical sub_group size
    Returns
    The result

Definition at line 217 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ set_default_queue()

static void syclcompat::set_default_queue ( const sycl::queue q)
inlinestatic

Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well.

This function will be blocking if there are submitted kernels in the previous default queue.

Parameters
qNew user-defined queue

Definition at line 755 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::detail::dev_mgr::instance(), and syclcompat::device_ext::set_default_queue().

◆ shift_sub_group_left()

template<typename T >
T syclcompat::shift_sub_group_left ( sycl::sub_group  g,
x,
unsigned int  delta,
int  logical_sub_group_size = 32 
)

shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is caller's id adds delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]deltaInput delta
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 241 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ shift_sub_group_right()

template<typename T >
T syclcompat::shift_sub_group_right ( sycl::sub_group  g,
x,
unsigned int  delta,
int  logical_sub_group_size = 32 
)

shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right.

The input sub_group will be divided into several logical_sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical_sub_group gets value from another work-item whose id is caller's id subtracts delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]deltaInput delta
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 269 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ unload_kernel_library()

static void syclcompat::unload_kernel_library ( const kernel_library library)
inlinestatic

Unload kernel library.

Parameters
[in,out]libraryHandle to the library to be closed.

Definition at line 408 of file kernel.hpp.

◆ unordered_compare() [1/2]

template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v<std::invoke_result_t<BinaryOperation, ValueT, ValueT>, bool>, bool> syclcompat::unordered_compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs unordered comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 220 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by unordered_compare(), unordered_compare_both(), and unordered_compare_mask().

◆ unordered_compare() [2/2]

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::unordered_compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element unordered comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 232 of file math.hpp.

References unordered_compare().

◆ unordered_compare_both()

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, bool> syclcompat::unordered_compare_both ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element unordered comparison and return true if both results are true.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 257 of file math.hpp.

References unordered_compare().

◆ unordered_compare_mask()

template<typename ValueT , class BinaryOperation >
unsigned syclcompat::unordered_compare_mask ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueT, 2 >  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 elements unordered comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 287 of file math.hpp.

References unordered_compare().

◆ vectorized_binary()

template<typename VecT , class BinaryOperation >
unsigned syclcompat::vectorized_binary ( unsigned  a,
unsigned  b,
const BinaryOperation  binary_op 
)
inline

Compute vectorized binary operation value for two values, with each value treated as a vector type VecT.

Template Parameters
[in]VecT The type of the vector
[in]BinaryOperation The binary operation class
Parameters
[in]aThe first value
[in]bThe second value
Returns
The vectorized binary operation value of the two values

Definition at line 684 of file math.hpp.

◆ vectorized_isgreater()

template<typename S , typename T >
T syclcompat::vectorized_isgreater ( a,
b 
)
inline

Compute vectorized isgreater for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized greater than of the two values

Definition at line 367 of file math.hpp.

◆ vectorized_isgreater< sycl::ushort2, unsigned >()

template<>
unsigned syclcompat::vectorized_isgreater< sycl::ushort2, unsigned > ( unsigned  a,
unsigned  b 
)
inline

Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short.

Parameters
[in]aThe first value
[in]bThe second value
Returns
The vectorized greater than of the two values

Definition at line 382 of file math.hpp.

◆ vectorized_max()

template<typename S , typename T >
T syclcompat::vectorized_max ( a,
b 
)
inline

Compute vectorized max for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized max of the two values

Definition at line 301 of file math.hpp.

◆ vectorized_min()

template<typename S , typename T >
T syclcompat::vectorized_min ( a,
b 
)
inline

Compute vectorized min for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized min of the two values

Definition at line 317 of file math.hpp.

◆ vectorized_sum_abs_diff()

template<typename VecT >
unsigned syclcompat::vectorized_sum_abs_diff ( unsigned  a,
unsigned  b 
)
inline

Compute vectorized absolute difference for two values without modulo overflow, with each value treated as a vector type VecT.

Template Parameters
[in]VecT The type of the vector
Parameters
[in]aThe first value
[in]bThe second value
Returns
The vectorized absolute difference of the two values

Definition at line 348 of file math.hpp.

References sycl::_V1::ext::intel::esimd::detail::sum().

◆ vectorized_unary()

template<typename VecT , class UnaryOperation >
unsigned syclcompat::vectorized_unary ( unsigned  a,
const UnaryOperation  unary_op 
)
inline

Compute vectorized unary operation for a value, with the value treated as a vector type VecT.

Template Parameters
[in]VecT The type of the vector
[in]UnaryOperation The unary operation class
Parameters
[in]aThe input value
Returns
The vectorized unary operation value of the input value

Definition at line 333 of file math.hpp.

◆ wait()

static void syclcompat::wait ( sycl::queue  q = get_default_queue())
inlinestatic

Definition at line 759 of file device.hpp.

Referenced by syclcompat::device_ext::~device_ext().

◆ wait_and_throw()

static void syclcompat::wait_and_throw ( sycl::queue  q = get_default_queue())
inlinestatic

Definition at line 761 of file device.hpp.

◆ wg_barrier()

void syclcompat::wg_barrier ( )
inline

Definition at line 31 of file id_query.hpp.