DPC++ Runtime
Runtime libraries for oneAPI DPC++
syclcompat Namespace Reference

Namespaces

 detail
 Atomic extension to implement standard APIs in std::atomic.
 
 experimental
 
 global_id
 
 global_range
 
 local_id
 
 local_range
 
 work_group_id
 
 work_group_range
 

Classes

class  atomic
 
class  device_info
 
class  device_ext
 device extension More...
 
class  dim3
 
struct  kernel_function_info
 
class  kernel_library
 
class  kernel_function
 
struct  abs
 A sycl::abs wrapper functors. More...
 
struct  abs_diff
 A sycl::abs_diff wrapper functors. More...
 
struct  add_sat
 A sycl::add_sat wrapper functors. More...
 
struct  rhadd
 A sycl::rhadd wrapper functors. More...
 
struct  hadd
 A sycl::hadd wrapper functors. More...
 
struct  maximum
 A sycl::max wrapper functors. More...
 
struct  minimum
 A sycl::min wrapper functors. More...
 
struct  sub_sat
 A sycl::sub_sat wrapper functors. More...
 
class  pitched_data
 Pitched 2D/3D memory data. More...
 
class  accessor
 accessor used as device function parameter. More...
 
class  accessor< T, Memory, 3 >
 
class  accessor< T, Memory, 2 >
 
class  device_memory
 Device variable with address space of shared or global. More...
 
class  device_memory< T, Memory, 0 >
 
class  pointer_attributes
 
struct  type_identity
 
struct  arith
 
class  args_selector
 
class  args_selector< n_nondefault_params, n_default_params, R(Ts...)>
 args_selector is a helper class for extracting arguments from an array of pointers to arguments or buffer of arguments to pass to a kernel function. More...
 

Typedefs

using event_ptr = sycl::event *
 
using queue_ptr = sycl::queue *
 
using device_ptr = char *
 
typedef void(* kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)
 
template<typename T1 , typename T2 >
using dot_product_acc_t = std::conditional_t< std::is_unsigned_v< T1 > &&std::is_unsigned_v< T2 >, uint32_t, int32_t >
 
using byte_t = uint8_t
 
template<class T , size_t Dimension>
using global_memory = device_memory< T, memory_region::global, Dimension >
 
template<class T , size_t Dimension>
using constant_memory = device_memory< T, memory_region::constant, Dimension >
 
template<class T , size_t Dimension>
using shared_memory = device_memory< T, memory_region::usm_shared, Dimension >
 
template<class T >
using type_identity_t = typename type_identity< T >::type
 
template<typename T >
using arith_t = typename arith< T >::type
 
using err0 = detail::generic_error_type< struct err0_tag, int >
 
using err1 = detail::generic_error_type< struct err1_tag, int >
 

Enumerations

enum  error_code { SUCCESS = 0 , BACKEND_ERROR = 1 , DEFAULT_ERROR = 999 }
 
enum class  memory_region { global = 0 , constant , local , usm_shared }
 
enum class  target { device , local }
 

Functions

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_add (T *addr, arith_t< T > operand)
 Atomically add the value operand to the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_sub (T *addr, arith_t< T > operand)
 Atomically subtract the value operand from the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_and (T *addr, type_identity_t< T > operand)
 Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_or (T *addr, type_identity_t< T > operand)
 Atomically or the value at the addr with the value operand, and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_xor (T *addr, type_identity_t< T > operand)
 Atomically xor the value at the addr with the value operand, and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_min (T *addr, type_identity_t< T > operand)
 Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_max (T *addr, type_identity_t< T > operand)
 Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int atomic_fetch_compare_dec (unsigned int *addr, unsigned int operand)
 Atomically set operand to the value stored in addr, if old value stored in addr is equal to zero or greater than operand, else decrease the value stored in addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int atomic_fetch_compare_inc (unsigned int *addr, unsigned int operand)
 Atomically increment the value stored in addr if old value stored in addr is less than operand, else set 0 to the value stored in addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_exchange (T *addr, type_identity_t< T > operand)
 Atomically exchange the value at the address addr with the value operand. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_compare_exchange_strong (sycl::multi_ptr< T, addressSpace > addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
 Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_compare_exchange_strong (T *addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
 Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected. More...
 
static void destroy_event (event_ptr event)
 Destroy event pointed memory. More...
 
static int get_major_version (const sycl::device &dev)
 
static int get_minor_version (const sycl::device &dev)
 
static sycl::queue create_queue (bool print_on_async_exceptions=false, bool in_order=true)
 
static sycl::queue get_default_queue ()
 Util function to get the default queue of current device in device manager. More...
 
static void set_default_queue (const sycl::queue &q)
 Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well. More...
 
static void wait (sycl::queue q=get_default_queue())
 
static void wait_and_throw (sycl::queue q=get_default_queue())
 
static unsigned int get_current_device_id ()
 Util function to get the id of current device in device manager. More...
 
static device_extget_current_device ()
 Util function to get the current device. More...
 
static device_extget_device (unsigned int id)
 Util function to get a device by id. More...
 
static sycl::context get_default_context ()
 Util function to get the context of the default queue of current device in device manager. More...
 
static device_extcpu_device ()
 Util function to get a CPU device. More...
 
static void filter_device (const std::vector< std::string > &dev_subnames)
 Filter out devices; only keep the device whose name contains one of the subname in dev_subnames. More...
 
static void list_devices ()
 List all the devices with its id in dev_mgr. More...
 
static unsigned int select_device (unsigned int id)
 
template<class DeviceSelector >
static std::enable_if_t< std::is_invocable_r_v< int, DeviceSelector, const sycl::device & > > select_device (const DeviceSelector &selector=sycl::gpu_selector_v)
 
static unsigned int get_device_id (const sycl::device &dev)
 
static unsigned int device_count ()
 
dim3 operator* (const dim3 &a, const dim3 &b)
 
dim3 operator+ (const dim3 &a, const dim3 &b)
 
dim3 operator- (const dim3 &a, const dim3 &b)
 
void wg_barrier ()
 
static void get_kernel_function_info (kernel_function_info *kernel_info, const void *function)
 
static kernel_function_info get_kernel_function_info (const void *function)
 
static kernel_library load_kernel_library (const std::string &name)
 Load kernel library and return a handle to use the library. More...
 
static kernel_library load_kernel_library_mem (char const *const image)
 Load kernel library whose image is alreay in memory and return a handle to use the library. More...
 
static void unload_kernel_library (const kernel_library &library)
 Unload kernel library. More...
 
static kernel_function get_kernel_function (kernel_library &library, const std::string &name)
 Find kernel function in a kernel library and return its address. More...
 
static void invoke_kernel_function (kernel_function &function, sycl::queue &queue, sycl::range< 3 > group_range, sycl::range< 3 > local_range, unsigned int local_mem_size, void **kernel_params, void **extra)
 Invoke a kernel function. More...
 
template<int Dim>
sycl::nd_range< Dim > compute_nd_range (sycl::range< Dim > global_size_in, sycl::range< Dim > work_group_size)
 
sycl::nd_range< 1 > compute_nd_range (int global_size_in, int work_group_size)
 
template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const sycl::nd_range< Dim > &range, sycl::queue q, Args... args)
 
template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const sycl::nd_range< Dim > &range, Args... args)
 
template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const dim3 &grid, const dim3 &threads, sycl::queue q, Args... args)
 
template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const dim3 &grid, const dim3 &threads, Args... args)
 
template<auto F, int Dim, typename... Args>
sycl::event launch (const sycl::nd_range< Dim > &range, size_t mem_size, sycl::queue q, Args... args)
 Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue. More...
 
template<auto F, int Dim, typename... Args>
sycl::event launch (const sycl::nd_range< Dim > &range, size_t mem_size, Args... args)
 Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue. More...
 
template<auto F, typename... Args>
sycl::event launch (const dim3 &grid, const dim3 &threads, size_t mem_size, sycl::queue q, Args... args)
 Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue. More...
 
template<auto F, typename... Args>
sycl::event launch (const dim3 &grid, const dim3 &threads, size_t mem_size, Args... args)
 Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue. More...
 
template<typename T >
bfe_safe (const T source, const uint32_t bit_start, const uint32_t num_bits)
 Bitfield-extract with boundary checking. More...
 
template<typename T >
bfi_safe (const T x, const T y, const uint32_t bit_start, const uint32_t num_bits)
 Bitfield-insert with boundary checking. More...
 
unsigned int funnelshift_l (unsigned int low, unsigned int high, unsigned int shift)
 Emulated function for __funnelshift_l. More...
 
unsigned int funnelshift_lc (unsigned int low, unsigned int high, unsigned int shift)
 Emulated function for __funnelshift_lc. More...
 
unsigned int funnelshift_r (unsigned int low, unsigned int high, unsigned int shift)
 Emulated function for __funnelshift_r. More...
 
unsigned int funnelshift_rc (unsigned int low, unsigned int high, unsigned int shift)
 Emulated function for __funnelshift_rc. More...
 
float fast_length (const float *a, int len)
 Compute fast_length for variable-length array. More...
 
template<typename ValueT >
ValueT length (const ValueT *a, const int len)
 Calculate the square root of the input array. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v< std::invoke_result_t< BinaryOperation, ValueT, ValueT >, bool >, bool > compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs comparison. More...
 
template<typename ValueT >
std::enable_if_t< std::is_same_v< std::invoke_result_t< std::not_equal_to<>, ValueT, ValueT >, bool >, bool > compare (const ValueT a, const ValueT b, const std::not_equal_to<> binary_op)
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, ValueT > compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element comparison. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v< std::invoke_result_t< BinaryOperation, ValueT, ValueT >, bool >, bool > unordered_compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs unordered comparison. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, ValueT > unordered_compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element unordered comparison. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, bool > compare_both (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element comparison and return true if both results are true. More...
 
template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, bool > unordered_compare_both (const ValueT a, const ValueT b, const BinaryOperation binary_op)
 Performs 2 element unordered comparison and return true if both results are true. More...
 
template<typename ValueT , class BinaryOperation >
unsigned compare_mask (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const BinaryOperation binary_op)
 Performs 2 elements comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements. More...
 
template<typename ValueT , class BinaryOperation >
unsigned unordered_compare_mask (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const BinaryOperation binary_op)
 Performs 2 elements unordered comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements. More...
 
template<typename S , typename T >
vectorized_max (T a, T b)
 Compute vectorized max for two values, with each value treated as a vector type S. More...
 
template<typename S , typename T >
vectorized_min (T a, T b)
 Compute vectorized min for two values, with each value treated as a vector type S. More...
 
template<typename VecT , class UnaryOperation >
unsigned vectorized_unary (unsigned a, const UnaryOperation unary_op)
 Compute vectorized unary operation for a value, with the value treated as a vector type VecT. More...
 
template<typename VecT >
unsigned vectorized_sum_abs_diff (unsigned a, unsigned b)
 Compute vectorized absolute difference for two values without modulo overflow, with each value treated as a vector type VecT. More...
 
template<typename S , typename T >
vectorized_isgreater (T a, T b)
 Compute vectorized isgreater for two values, with each value treated as a vector type S. More...
 
template<>
unsigned vectorized_isgreater< sycl::ushort2, unsigned > (unsigned a, unsigned b)
 Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short. More...
 
template<typename ValueT >
ValueT clamp (ValueT val, ValueT min_val, ValueT max_val)
 Returns min(max(val, min_val), max_val) More...
 
template<typename ValueT >
std::enable_if_t< ValueT::size()==2, ValueT > isnan (const ValueT a)
 Determine whether 2 element value is NaN. More...
 
template<typename ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, ValueT > cbrt (ValueT val)
 cbrt function wrapper. More...
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_integral_v< ValueT > &&std::is_integral_v< ValueU >, std::common_type_t< ValueT, ValueU > > min (ValueT a, ValueU b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT > &&std::is_floating_point_v< ValueU >, std::common_type_t< ValueT, ValueU > > min (ValueT a, ValueU b)
 
sycl::half min (sycl::half a, sycl::half b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_integral_v< ValueT > &&std::is_integral_v< ValueU >, std::common_type_t< ValueT, ValueU > > max (ValueT a, ValueU b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT > &&std::is_floating_point_v< ValueU >, std::common_type_t< ValueT, ValueU > > max (ValueT a, ValueU b)
 
sycl::half max (sycl::half a, sycl::half b)
 
template<typename ValueT , typename ValueU >
std::common_type_t< ValueT, ValueU > fmax_nan (const ValueT a, const ValueU b)
 Performs 2 elements comparison and returns the bigger one. More...
 
template<typename ValueT , typename ValueU >
sycl::vec< std::common_type_t< ValueT, ValueU >, 2 > fmax_nan (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueU, 2 > b)
 
template<typename ValueT , typename ValueU >
std::common_type_t< ValueT, ValueU > fmin_nan (const ValueT a, const ValueU b)
 Performs 2 elements comparison and returns the smaller one. More...
 
template<typename ValueT , typename ValueU >
sycl::vec< std::common_type_t< ValueT, ValueU >, 2 > fmin_nan (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueU, 2 > b)
 
float pow (const float a, const int b)
 
double pow (const double a, const int b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT >, ValueT > pow (const ValueT a, const ValueU b)
 
template<typename ValueT , typename ValueU >
std::enable_if_t<!std::is_floating_point_v< ValueT >, double > pow (const ValueT a, const ValueU b)
 
template<typename ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, ValueT > relu (const ValueT a)
 Performs relu saturation. More...
 
template<class ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, sycl::vec< ValueT, 2 > > relu (const sycl::vec< ValueT, 2 > a)
 
template<class ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, sycl::marray< ValueT, 2 > > relu (const sycl::marray< ValueT, 2 > a)
 
template<typename T >
sycl::vec< T, 2 > cmul (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
 Computes the multiplication of two complex numbers. More...
 
template<typename T >
sycl::vec< T, 2 > cdiv (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
 Computes the division of two complex numbers. More...
 
template<typename T >
cabs (sycl::vec< T, 2 > x)
 Computes the magnitude of a complex number. More...
 
template<typename T >
sycl::vec< T, 2 > conj (sycl::vec< T, 2 > x)
 Computes the complex conjugate of a complex number. More...
 
template<typename ValueT >
sycl::vec< ValueT, 2 > cmul_add (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const sycl::vec< ValueT, 2 > c)
 Performs complex number multiply addition. More...
 
template<typename ValueT >
sycl::marray< ValueT, 2 > cmul_add (const sycl::marray< ValueT, 2 > a, const sycl::marray< ValueT, 2 > b, const sycl::marray< ValueT, 2 > c)
 
template<typename VecT , class BinaryOperation >
unsigned vectorized_binary (unsigned a, unsigned b, const BinaryOperation binary_op)
 Compute vectorized binary operation value for two values, with each value treated as a vector type VecT. More...
 
template<typename T1 , typename T2 >
dot_product_acc_t< T1, T2 > dp2a_lo (T1 a, T2 b, dot_product_acc_t< T1, T2 > c)
 Two-way dot product-accumulate. More...
 
template<typename T1 , typename T2 >
dot_product_acc_t< T1, T2 > dp2a_hi (T1 a, T2 b, dot_product_acc_t< T1, T2 > c)
 Two-way dot product-accumulate. More...
 
template<typename T1 , typename T2 >
dot_product_acc_t< T1, T2 > dp4a (T1 a, T2 b, dot_product_acc_t< T1, T2 > c)
 Four-way byte dot product-accumulate. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_add (AT a, BT b)
 Extend a and b to 33 bit and add them. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_add (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, add a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_add_sat (AT a, BT b)
 Extend a and b to 33 bit and add them with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_add_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, add a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_sub (AT a, BT b)
 Extend a and b to 33 bit and minus them. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_sub (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, minus a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_sub_sat (AT a, BT b)
 Extend a and b to 33 bit and minus them with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_sub_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, minus a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_absdiff (AT a, BT b)
 Extend a and b to 33 bit and do abs_diff. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_absdiff (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, abs_diff a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_absdiff_sat (AT a, BT b)
 Extend a and b to 33 bit and do abs_diff with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_absdiff_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, abs_diff a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_min (AT a, BT b)
 Extend a and b to 33 bit and return smaller one. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_min (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the smaller one in a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_min_sat (AT a, BT b)
 Extend a and b to 33 bit and return smaller one with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_min_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the smaller one in a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_max (AT a, BT b)
 Extend a and b to 33 bit and return bigger one. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_max (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the bigger one in a, b, then do second_op with c. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_max_sat (AT a, BT b)
 Extend a and b to 33 bit and return bigger one with saturation. More...
 
template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT extend_max_sat (AT a, BT b, CT c, BinaryOperation second_op)
 Extend Inputs to 33 bit, find the bigger one in a, b with saturation, then do second_op with c. More...
 
template<typename RetT , typename T >
constexpr RetT extend_shl_clamp (T a, uint32_t b)
 Extend a and b to 33 bit and return a << clamp(b, 0, 32). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shl_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(a << clamp(b, 0, 32), c). More...
 
template<typename RetT , typename T >
constexpr RetT extend_shl_sat_clamp (T a, uint32_t b)
 Extend a and b to 33 bit and return sat(a << clamp(b, 0, 32)). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shl_sat_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(sat(a << clamp(b, 0, 32)), c). More...
 
template<typename RetT , typename T >
constexpr RetT extend_shl_wrap (T a, uint32_t b)
 Extend a and b to 33 bit and return a << (b & 0x1F). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shl_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(a << (b & 0x1F), c). More...
 
template<typename RetT , typename T >
constexpr RetT extend_shl_sat_wrap (T a, uint32_t b)
 Extend a and b to 33 bit and return sat(a << (b & 0x1F)). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shl_sat_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(sat(a << (b & 0x1F)), c). More...
 
template<typename RetT , typename T >
constexpr RetT extend_shr_clamp (T a, uint32_t b)
 Extend a and b to 33 bit and return a >> clamp(b, 0, 32). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shr_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(a >> clamp(b, 0, 32), c). More...
 
template<typename RetT , typename T >
constexpr RetT extend_shr_sat_clamp (T a, uint32_t b)
 Extend a and b to 33 bit and return sat(a >> clamp(b, 0, 32)). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shr_sat_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(sat(a >> clamp(b, 0, 32)), c). More...
 
template<typename RetT , typename T >
constexpr RetT extend_shr_wrap (T a, uint32_t b)
 Extend a and b to 33 bit and return a >> (b & 0x1F). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shr_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(a >> (b & 0x1F), c). More...
 
template<typename RetT , typename T >
constexpr RetT extend_shr_sat_wrap (T a, uint32_t b)
 Extend a and b to 33 bit and return sat(a >> (b & 0x1F)). More...
 
template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT extend_shr_sat_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
 Extend Inputs to 33 bit, and return second_op(sat(a >> (b & 0x1F)), c). More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vadd2 (AT a, BT b, RetT c)
 Compute vectorized addition of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vadd2_add (AT a, BT b, RetT c)
 Compute vectorized addition of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vadd2_sat (AT a, BT b, RetT c)
 Compute vectorized addition of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vsub2 (AT a, BT b, RetT c)
 Compute vectorized subtraction of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vsub2_add (AT a, BT b, RetT c)
 Compute vectorized subtraction of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vsub2_sat (AT a, BT b, RetT c)
 Compute vectorized subtraction of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vabsdiff2 (AT a, BT b, RetT c)
 Compute vectorized abs_diff of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vabsdiff2_add (AT a, BT b, RetT c)
 Compute vectorized abs_diff of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vabsdiff2_sat (AT a, BT b, RetT c)
 Compute vectorized abs_diff of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmin2 (AT a, BT b, RetT c)
 Compute vectorized minimum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmin2_add (AT a, BT b, RetT c)
 Compute vectorized minimum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmin2_sat (AT a, BT b, RetT c)
 Compute vectorized minimum of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmax2 (AT a, BT b, RetT c)
 Compute vectorized maximum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmax2_add (AT a, BT b, RetT c)
 Compute vectorized maximum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmax2_sat (AT a, BT b, RetT c)
 Compute vectorized maximum of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vavrg2 (AT a, BT b, RetT c)
 Compute vectorized average of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vavrg2_add (AT a, BT b, RetT c)
 Compute vectorized average of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vavrg2_sat (AT a, BT b, RetT c)
 Compute vectorized average of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...
 
template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned extend_vcompare2 (AT a, BT b, BinaryOperation cmp)
 Extend a and b to 33 bit and vectorized compare input values using specified comparison cmp . More...
 
template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned extend_vcompare2_add (AT a, BT b, unsigned c, BinaryOperation cmp)
 Extend Inputs to 33 bit, and vectorized compare input values using specified comparison cmp , then add the result with c . More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vadd4 (AT a, BT b, RetT c)
 Compute vectorized addition of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vadd4_add (AT a, BT b, RetT c)
 Compute vectorized addition of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vadd4_sat (AT a, BT b, RetT c)
 Compute vectorized addition of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vsub4 (AT a, BT b, RetT c)
 Compute vectorized subtraction of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vsub4_add (AT a, BT b, RetT c)
 Compute vectorized subtraction of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vsub4_sat (AT a, BT b, RetT c)
 Compute vectorized subtraction of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vabsdiff4 (AT a, BT b, RetT c)
 Compute vectorized abs_diff of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vabsdiff4_add (AT a, BT b, RetT c)
 Compute vectorized abs_diff of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vabsdiff4_sat (AT a, BT b, RetT c)
 Compute vectorized abs_diff of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmin4 (AT a, BT b, RetT c)
 Compute vectorized minimum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmin4_add (AT a, BT b, RetT c)
 Compute vectorized minimum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmin4_sat (AT a, BT b, RetT c)
 Compute vectorized minimum of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmax4 (AT a, BT b, RetT c)
 Compute vectorized maximum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmax4_add (AT a, BT b, RetT c)
 Compute vectorized maximum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vmax4_sat (AT a, BT b, RetT c)
 Compute vectorized maximum of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vavrg4 (AT a, BT b, RetT c)
 Compute vectorized average of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vavrg4_add (AT a, BT b, RetT c)
 Compute vectorized average of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename RetT , typename AT , typename BT >
constexpr RetT extend_vavrg4_sat (AT a, BT b, RetT c)
 Compute vectorized average of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...
 
template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned extend_vcompare4 (AT a, BT b, BinaryOperation cmp)
 Extend a and b to 33 bit and vectorized compare input values using specified comparison cmp . More...
 
template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned extend_vcompare4_add (AT a, BT b, unsigned c, BinaryOperation cmp)
 Extend Inputs to 33 bit, and vectorized compare input values using specified comparison cmp , then add the result with c . More...
 
template<typename AllocT >
auto * local_mem ()
 
static void * malloc (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block on the device. More...
 
static void * malloc_host (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block on the host. More...
 
static void * malloc_shared (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block of usm_shared memory. More...
 
static pitched_data malloc (sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Allocate memory block for 3D array on the device. More...
 
static void * malloc (size_t &pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Allocate memory block for 2D array on the device. More...
 
static void wait_and_free (void *ptr, sycl::queue q=get_default_queue())
 Wait on the queue q and free the memory ptr. More...
 
static void free (void *ptr, sycl::queue q=get_default_queue())
 Free the memory ptr on the default queue without synchronizing. More...
 
sycl::event enqueue_free (const std::vector< void * > &pointers, const std::vector< sycl::event > &events, sycl::queue q=get_default_queue())
 Enqueues the release of all pointers in /p pointers on the /p q. More...
 
static void memcpy (void *to_ptr, const void *from_ptr, size_t size, sycl::queue q=get_default_queue())
 Synchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr. More...
 
static sycl::event memcpy_async (void *to_ptr, const void *from_ptr, size_t size, sycl::queue q=get_default_queue())
 Asynchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr. More...
 
template<typename T >
static sycl::event memcpy_async (type_identity_t< T > *to_ptr, const type_identity_t< T > *from_ptr, size_t count, sycl::queue q=get_default_queue())
 Asynchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr. More...
 
template<typename T >
static void memcpy (type_identity_t< T > *to_ptr, const type_identity_t< T > *from_ptr, size_t count, sycl::queue q=get_default_queue())
 Synchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr. More...
 
static void memcpy (void *to_ptr, size_t to_pitch, const void *from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Synchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr. More...
 
static sycl::event memcpy_async (void *to_ptr, size_t to_pitch, const void *from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Asynchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr. More...
 
static void memcpy (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Synchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from. More...
 
static sycl::event memcpy_async (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Asynchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from. More...
 
template<class T >
static void fill (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
 Synchronously sets pattern to the first count elements starting from dev_ptr. More...
 
template<class T >
static sycl::event fill_async (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
 Asynchronously sets pattern to the first count elements starting from dev_ptr. More...
 
static void memset (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
 Synchronously sets value to the first size bytes starting from dev_ptr. More...
 
static void memset_d16 (void *dev_ptr, unsigned short value, size_t size, sycl::queue q=get_default_queue())
 Sets 2 bytes data value to the first size elements starting from dev_ptr in q synchronously. More...
 
static void memset_d32 (void *dev_ptr, unsigned int value, size_t size, sycl::queue q=get_default_queue())
 Sets 4 bytes data value to the first size elements starting from dev_ptr in q synchronously. More...
 
static sycl::event memset_async (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
 Sets 1 byte data value to the first size elements starting from dev_ptr in q asynchronously. More...
 
static sycl::event memset_d16_async (void *dev_ptr, unsigned short value, size_t size, sycl::queue q=get_default_queue())
 Sets 2 bytes data value to the first size elements starting from dev_ptr in q asynchronously. More...
 
static sycl::event memset_d32_async (void *dev_ptr, unsigned int value, size_t size, sycl::queue q=get_default_queue())
 Sets 4 bytes data value to the first size elements starting from dev_ptr in q asynchronously. More...
 
static void memset (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q synchronously. More...
 
static void memset_d16 (void *ptr, size_t pitch, unsigned short val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously. More...
 
static void memset_d32 (void *ptr, size_t pitch, unsigned int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously. More...
 
static sycl::event memset_async (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q asynchronously. More...
 
static sycl::event memset_d16_async (void *ptr, size_t pitch, unsigned short val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously. More...
 
static sycl::event memset_d32_async (void *ptr, size_t pitch, unsigned int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously. More...
 
static void memset (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Sets value to the 3D memory region specified by pitch in q. More...
 
static sycl::event memset_async (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Sets value to the 3D memory region specified by pitch in q. More...
 
int cast_double_to_int (double d, bool use_high32=true)
 Cast the high or low 32 bits of a double to an integer. More...
 
double cast_ints_to_double (int high32, int low32)
 Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double. More...
 
template<typename T >
reverse_bits (T a)
 Reverse the bit order of an unsigned integer. More...
 
unsigned int byte_level_permute (unsigned int a, unsigned int b, unsigned int s)
 
template<typename T >
int ffs (T a)
 Find position of first least significant set bit in an integer. More...
 
template<typename T >
select_from_sub_group (sycl::sub_group g, T x, int remote_local_id, int logical_sub_group_size=32)
 select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group. More...
 
template<typename T >
shift_sub_group_left (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
 shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left. More...
 
template<typename T >
shift_sub_group_right (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
 shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right. More...
 
template<typename T >
permute_sub_group_by_xor (sycl::sub_group g, T x, unsigned int mask, int logical_sub_group_size=32)
 permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask. More...
 
int get_sycl_language_version ()
 Inherited from the original SYCLomatic compatibility headers. More...
 
template<typename T >
unsigned int match_any_over_sub_group (sycl::sub_group g, unsigned member_mask, T value)
 The function match_any_over_sub_group conducts a comparison of values across work-items within a sub-group. More...
 
template<typename T >
unsigned int match_all_over_sub_group (sycl::sub_group g, unsigned member_mask, T value, int *pred)
 The function match_all_over_sub_group conducts a comparison of values across work-items within a sub-group. More...
 
queue_ptr int_as_queue_ptr (uintptr_t x)
 If x <= 2, then return a pointer to the default queue; otherwise, return x reinterpreted as a queue_ptr. More...
 

Typedef Documentation

◆ arith_t

template<typename T >
using syclcompat::arith_t = typedef typename arith<T>::type

Definition at line 42 of file traits.hpp.

◆ byte_t

using syclcompat::byte_t = typedef uint8_t

Definition at line 99 of file memory.hpp.

◆ constant_memory

template<class T , size_t Dimension>
using syclcompat::constant_memory = typedef device_memory<T, memory_region::constant, Dimension>

Definition at line 1283 of file memory.hpp.

◆ device_ptr

using syclcompat::device_ptr = typedef char *

Definition at line 116 of file device.hpp.

◆ dot_product_acc_t

template<typename T1 , typename T2 >
using syclcompat::dot_product_acc_t = typedef std::conditional_t<std::is_unsigned_v<T1> && std::is_unsigned_v<T2>, uint32_t, int32_t>

Definition at line 996 of file math.hpp.

◆ err0

using syclcompat::err0 = typedef detail::generic_error_type<struct err0_tag, int>

Definition at line 133 of file util.hpp.

◆ err1

using syclcompat::err1 = typedef detail::generic_error_type<struct err1_tag, int>

Definition at line 134 of file util.hpp.

◆ event_ptr

Definition at line 112 of file device.hpp.

◆ global_memory

template<class T , size_t Dimension>
using syclcompat::global_memory = typedef device_memory<T, memory_region::global, Dimension>

Definition at line 1281 of file memory.hpp.

◆ kernel_functor

typedef void(* syclcompat::kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)

Definition at line 59 of file kernel.hpp.

◆ queue_ptr

Definition at line 114 of file device.hpp.

◆ shared_memory

template<class T , size_t Dimension>
using syclcompat::shared_memory = typedef device_memory<T, memory_region::usm_shared, Dimension>

Definition at line 1285 of file memory.hpp.

◆ type_identity_t

template<class T >
using syclcompat::type_identity_t = typedef typename type_identity<T>::type

Definition at line 35 of file traits.hpp.

Enumeration Type Documentation

◆ error_code

Enumerator
SUCCESS 
BACKEND_ERROR 
DEFAULT_ERROR 

Definition at line 59 of file defs.hpp.

◆ memory_region

Enumerator
global 
constant 
local 
usm_shared 

Definition at line 90 of file memory.hpp.

◆ target

enum syclcompat::target
strong
Enumerator
device 
local 

Definition at line 97 of file memory.hpp.

Function Documentation

◆ atomic_compare_exchange_strong() [1/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_compare_exchange_strong ( sycl::multi_ptr< T, addressSpace >  addr,
type_identity_t< T >  expected,
type_identity_t< T >  desired,
sycl::memory_order  success = sycl::memory_order::relaxed,
sycl::memory_order  fail = sycl::memory_order::relaxed 
)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters
[in,out]addrMulti_ptr.
expectedThe value to compare against the value at addr.
desiredThe value to assign to addr if the value at addr is expected.
successThe memory ordering used when comparison succeeds.
failThe memory ordering used when comparison fails.
Returns
The value at the addr before the call.

Definition at line 253 of file atomic.hpp.

◆ atomic_compare_exchange_strong() [2/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_compare_exchange_strong ( T *  addr,
type_identity_t< T >  expected,
type_identity_t< T >  desired,
sycl::memory_order  success = sycl::memory_order::relaxed,
sycl::memory_order  fail = sycl::memory_order::relaxed 
)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters
[in]addrThe pointer to the data.
expectedThe value to compare against the value at addr.
desiredThe value to assign to addr if the value at addr is expected.
successThe memory ordering used when comparison succeeds.
failThe memory ordering used when comparison fails.
Returns
The value at the addr before the call.

Definition at line 279 of file atomic.hpp.

◆ atomic_exchange()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_exchange ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically exchange the value at the address addr with the value operand.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to be exchanged with the value pointed by addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 232 of file atomic.hpp.

◆ atomic_fetch_add()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_add ( T *  addr,
arith_t< T >  operand 
)
inline

Atomically add the value operand to the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to add to the value at addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 56 of file atomic.hpp.

◆ atomic_fetch_and()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_and ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise AND operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 91 of file atomic.hpp.

◆ atomic_fetch_compare_dec()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int syclcompat::atomic_fetch_compare_dec ( unsigned int *  addr,
unsigned int  operand 
)

Atomically set operand to the value stored in addr, if old value stored in addr is equal to zero or greater than operand, else decrease the value stored in addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe threshold value.
memoryOrderThe memory ordering used.
Returns
The old value stored in addr.

Definition at line 176 of file atomic.hpp.

◆ atomic_fetch_compare_inc()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int syclcompat::atomic_fetch_compare_inc ( unsigned int *  addr,
unsigned int  operand 
)
inline

Atomically increment the value stored in addr if old value stored in addr is less than operand, else set 0 to the value stored in addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe threshold value.
memoryOrderThe memory ordering used.
Returns
The old value stored in addr.

Definition at line 205 of file atomic.hpp.

◆ atomic_fetch_max()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_max ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operand.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 160 of file atomic.hpp.

◆ atomic_fetch_min()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_min ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operand.

Definition at line 143 of file atomic.hpp.

◆ atomic_fetch_or()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_or ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically or the value at the addr with the value operand, and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise OR operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 109 of file atomic.hpp.

◆ atomic_fetch_sub()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_sub ( T *  addr,
arith_t< T >  operand 
)
inline

Atomically subtract the value operand from the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to subtract from the value at addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 73 of file atomic.hpp.

◆ atomic_fetch_xor()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_xor ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically xor the value at the addr with the value operand, and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise XOR operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 127 of file atomic.hpp.

◆ bfe_safe()

template<typename T >
T syclcompat::bfe_safe ( const T  source,
const uint32_t  bit_start,
const uint32_t  num_bits 
)
inline

Bitfield-extract with boundary checking.

Extract bit field from

Parameters
sourceand return the zero or sign-extended result. Source
bit_startgives the bit field starting bit position, and source
num_bitsgives the bit field length in bits.

The result is padded with the sign bit of the extracted field. If num_bits is zero, the result is zero. If the start position is beyond the msb of the input, the result is filled with the replicated sign bit of the extracted field.

Template Parameters
TThe type of
Parameters
sourcevalue, must be an integer.
sourceThe source value to extracting.
bit_startThe position to start extracting.
num_bitsThe number of bits to extracting.

Definition at line 266 of file math.hpp.

References syclcompat::detail::bfe().

◆ bfi_safe()

template<typename T >
T syclcompat::bfi_safe ( const T  x,
const T  y,
const uint32_t  bit_start,
const uint32_t  num_bits 
)
inline

Bitfield-insert with boundary checking.

Align and insert a bit field from

Parameters
xinto
y. Source
bit_startgives the starting bit position for the insertion, and source
num_bitsgives the bit field length in bits.
Template Parameters
TThe type of
Parameters
xand
y,mustbe an unsigned integer.
xThe source of the bitfield.
yThe source where bitfield is inserted.
bit_startThe position to start insertion.
num_bitsThe number of bits to insertion.

Definition at line 357 of file math.hpp.

References syclcompat::detail::bfi().

◆ byte_level_permute()

unsigned int syclcompat::byte_level_permute ( unsigned int  a,
unsigned int  b,
unsigned int  s 
)
inline
Parameters
[in]aThe first value contains 4 bytes
[in]bThe second value contains 4 bytes
[in]sThe selector value, only lower 16bit used
Returns
the permutation result of 4 bytes selected in the way specified by s from a and b

Definition at line 182 of file util.hpp.

◆ cabs()

template<typename T >
T syclcompat::cabs ( sycl::vec< T, 2 >  x)

Computes the magnitude of a complex number.

Template Parameters
TComplex element type
Parameters
[in]xThe input complex number
Returns
The result

Definition at line 849 of file math.hpp.

References sycl::_V1::ext::intel::esimd::abs().

◆ cast_double_to_int()

int syclcompat::cast_double_to_int ( double  d,
bool  use_high32 = true 
)
inline

Cast the high or low 32 bits of a double to an integer.

Parameters
[in]dThe double value.
[in]use_high32Cast the high 32 bits of the double if true; otherwise cast the low 32 bits.

Definition at line 140 of file util.hpp.

◆ cast_ints_to_double()

double syclcompat::cast_ints_to_double ( int  high32,
int  low32 
)
inline

Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double.

Parameters
[in]high32The integer as the high 32 bits
[in]low32The integer as the low 32 bits

Definition at line 152 of file util.hpp.

◆ cbrt()

template<typename ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, ValueT> syclcompat::cbrt ( ValueT  val)
inline

cbrt function wrapper.

Definition at line 692 of file math.hpp.

◆ cdiv()

template<typename T >
sycl::vec<T, 2> syclcompat::cdiv ( sycl::vec< T, 2 >  x,
sycl::vec< T, 2 >  y 
)

Computes the division of two complex numbers.

Template Parameters
TComplex element type
Parameters
[in]xThe first input complex number
[in]yThe second input complex number
Returns
The result

Definition at line 839 of file math.hpp.

◆ clamp()

template<typename ValueT >
ValueT syclcompat::clamp ( ValueT  val,
ValueT  min_val,
ValueT  max_val 
)
inline

Returns min(max(val, min_val), max_val)

Parameters
[in]valThe input value
[in]min_valThe minimum value
[in]max_valThe maximum value
Returns
the value between min_val and max_val

Definition at line 675 of file math.hpp.

References syclcompat::detail::clamp().

◆ cmul()

template<typename T >
sycl::vec<T, 2> syclcompat::cmul ( sycl::vec< T, 2 >  x,
sycl::vec< T, 2 >  y 
)

Computes the multiplication of two complex numbers.

Template Parameters
TComplex element type
Parameters
[in]xThe first input complex number
[in]yThe second input complex number
Returns
The result

Definition at line 827 of file math.hpp.

◆ cmul_add() [1/2]

template<typename ValueT >
sycl::marray<ValueT, 2> syclcompat::cmul_add ( const sycl::marray< ValueT, 2 >  a,
const sycl::marray< ValueT, 2 >  b,
const sycl::marray< ValueT, 2 >  c 
)
inline

Definition at line 880 of file math.hpp.

◆ cmul_add() [2/2]

template<typename ValueT >
sycl::vec<ValueT, 2> syclcompat::cmul_add ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueT, 2 >  b,
const sycl::vec< ValueT, 2 >  c 
)
inline

Performs complex number multiply addition.

Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
the operation result

Definition at line 870 of file math.hpp.

◆ compare() [1/3]

template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v<std::invoke_result_t<BinaryOperation, ValueT, ValueT>, bool>, bool> syclcompat::compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 463 of file math.hpp.

Referenced by compare(), compare_both(), and compare_mask().

◆ compare() [2/3]

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 482 of file math.hpp.

References compare().

◆ compare() [3/3]

template<typename ValueT >
std::enable_if_t< std::is_same_v<std::invoke_result_t<std::not_equal_to<>, ValueT, ValueT>, bool>, bool> syclcompat::compare ( const ValueT  a,
const ValueT  b,
const std::not_equal_to<>  binary_op 
)
inline

Definition at line 471 of file math.hpp.

References syclcompat::detail::isnan().

◆ compare_both()

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, bool> syclcompat::compare_both ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element comparison and return true if both results are true.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 520 of file math.hpp.

References compare().

◆ compare_mask()

template<typename ValueT , class BinaryOperation >
unsigned syclcompat::compare_mask ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueT, 2 >  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 elements comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 546 of file math.hpp.

References compare().

◆ compute_nd_range() [1/2]

sycl::nd_range<1> syclcompat::compute_nd_range ( int  global_size_in,
int  work_group_size 
)
inline

Definition at line 110 of file launch.hpp.

References sycl::_V1::ext::oneapi::experimental::work_group_size.

◆ compute_nd_range() [2/2]

template<int Dim>
sycl::nd_range<Dim> syclcompat::compute_nd_range ( sycl::range< Dim >  global_size_in,
sycl::range< Dim >  work_group_size 
)
inline

◆ conj()

template<typename T >
sycl::vec<T, 2> syclcompat::conj ( sycl::vec< T, 2 >  x)

Computes the complex conjugate of a complex number.

Template Parameters
TComplex element type
Parameters
[in]xThe input complex number
Returns
The result

Definition at line 858 of file math.hpp.

◆ cpu_device()

static device_ext& syclcompat::cpu_device ( )
inlinestatic

Util function to get a CPU device.

Definition at line 908 of file device.hpp.

References syclcompat::detail::dev_mgr::cpu_device(), and syclcompat::detail::dev_mgr::instance().

◆ create_queue()

static sycl::queue syclcompat::create_queue ( bool  print_on_async_exceptions = false,
bool  in_order = true 
)
inlinestatic

◆ destroy_event()

static void syclcompat::destroy_event ( event_ptr  event)
static

Destroy event pointed memory.

Parameters
eventPointer to the sycl::event address.

Definition at line 121 of file device.hpp.

◆ device_count()

static unsigned int syclcompat::device_count ( )
inlinestatic

◆ dp2a_hi()

template<typename T1 , typename T2 >
dot_product_acc_t<T1, T2> syclcompat::dp2a_hi ( T1  a,
T2  b,
dot_product_acc_t< T1, T2 >  c 
)
inline

Two-way dot product-accumulate.

Calculate and return integer_vector2(

Parameters
a)dot product integer_vector2(high_16bit(
b))+
c
Template Parameters
[in]T1 The type of first value.
[in]T2 The type of second value.
Parameters
[in]aThe first value.
[in]bThe second value.
[in]cThe third value. uint32_t if both T1 and T1 are uint32_t else has type int32_t.
Returns
Two-way 16-bit to 8-bit dot product which is accumulated in 32-bit result.

Definition at line 1059 of file math.hpp.

References syclcompat::detail::extract_and_sign_or_zero_extend2(), and syclcompat::detail::extract_and_sign_or_zero_extend4().

◆ dp2a_lo()

template<typename T1 , typename T2 >
dot_product_acc_t<T1, T2> syclcompat::dp2a_lo ( T1  a,
T2  b,
dot_product_acc_t< T1, T2 >  c 
)
inline

Two-way dot product-accumulate.

Calculate and return integer_vector2(

Parameters
a)dot product integer_vector2(low16_bit(
b))+
c
Template Parameters
[in]T1 The type of first value.
[in]T2 The type of second value.
Parameters
[in]aThe first value.
[in]bThe second value.
[in]cThe third value. It has type uint32_t if both T1 and T1 are uint32_t else has type int32_t.
Returns
Two-way 16-bit to 8-bit dot product which is accumulated in 32-bit result.

Definition at line 1030 of file math.hpp.

References syclcompat::detail::extract_and_sign_or_zero_extend2(), and syclcompat::detail::extract_and_sign_or_zero_extend4().

◆ dp4a()

template<typename T1 , typename T2 >
dot_product_acc_t<T1, T2> syclcompat::dp4a ( T1  a,
T2  b,
dot_product_acc_t< T1, T2 >  c 
)
inline

Four-way byte dot product-accumulate.

Calculate and return integer_vector4(

Parameters
a)dot product integer_vector4(
b)+
c
Template Parameters
[in]T1 The type of first value.
[in]T2 The type of second value.
Parameters
[in]aThe first value.
[in]bThe second value.
[in]cThe third value. It has type uint32_t if both T1 and T1 are uint32_t else has type int32_t.
Returns
Four-way byte dot product which is accumulated in 32-bit result.

Definition at line 1087 of file math.hpp.

References syclcompat::detail::extract_and_sign_or_zero_extend4().

◆ enqueue_free()

sycl::event syclcompat::enqueue_free ( const std::vector< void * > &  pointers,
const std::vector< sycl::event > &  events,
sycl::queue  q = get_default_queue() 
)
inline

Enqueues the release of all pointers in /p pointers on the /p q.

The command waits on all passed /p events and returns an event that track the commands execution on the queue.

Parameters
pointersThe pointers point to the device memory requested to be freed.
eventsThe events to be waited on.
qThe sycl::queue the memory relates to.

Definition at line 648 of file memory.hpp.

◆ extend_absdiff() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_absdiff ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and do abs_diff.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend abs_diff of the two values

Definition at line 1235 of file math.hpp.

◆ extend_absdiff() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_absdiff ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, abs_diff a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend abs_diff of a, b and second_op with c

Definition at line 1253 of file math.hpp.

◆ extend_absdiff_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_absdiff_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and do abs_diff with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend abs_diff of the two values with saturation

Definition at line 1266 of file math.hpp.

◆ extend_absdiff_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_absdiff_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, abs_diff a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend abs_diff of a, b with saturation and second_op with c

Definition at line 1285 of file math.hpp.

◆ extend_add() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_add ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and add them.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend addition of the two values

Definition at line 1113 of file math.hpp.

◆ extend_add() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_add ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, add a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend addition of a, b and second_op with c

Definition at line 1130 of file math.hpp.

◆ extend_add_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_add_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and add them with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend addition of the two values with saturation

Definition at line 1142 of file math.hpp.

◆ extend_add_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_add_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, add a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend addition of a, b with saturation and second_op with c

Definition at line 1161 of file math.hpp.

◆ extend_max() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_max ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return bigger one.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The bigger one of the two extended values

Definition at line 1360 of file math.hpp.

◆ extend_max() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_max ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the bigger one in a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The bigger one of a, b and second_op with c

Definition at line 1378 of file math.hpp.

◆ extend_max_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_max_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return bigger one with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The bigger one of the two extended values with saturation

Definition at line 1390 of file math.hpp.

◆ extend_max_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_max_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the bigger one in a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The bigger one of a, b with saturation and second_op with c

Definition at line 1409 of file math.hpp.

◆ extend_min() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_min ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return smaller one.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The smaller one of the two extended values

Definition at line 1298 of file math.hpp.

◆ extend_min() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_min ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the smaller one in a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The smaller one of a, b and second_op with c

Definition at line 1316 of file math.hpp.

◆ extend_min_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_min_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and return smaller one with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The smaller one of the two extended values with saturation

Definition at line 1328 of file math.hpp.

◆ extend_min_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_min_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, find the smaller one in a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The smaller one of a, b with saturation and second_op with c

Definition at line 1347 of file math.hpp.

◆ extend_shl_clamp() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shl_clamp ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return a << clamp(b, 0, 32).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
a << clamp(b, 0, 32)

Definition at line 1419 of file math.hpp.

◆ extend_shl_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shl_clamp ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a << clamp(b, 0, 32), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(a << clamp(b, 0, 32), c)

Definition at line 1431 of file math.hpp.

◆ extend_shl_sat_clamp() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shl_sat_clamp ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return sat(a << clamp(b, 0, 32)).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
sat(a << clamp(b, 0, 32))

Definition at line 1442 of file math.hpp.

◆ extend_shl_sat_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shl_sat_clamp ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a << clamp(b, 0, 32)), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(sat(a << clamp(b, 0, 32)), c)

Definition at line 1454 of file math.hpp.

◆ extend_shl_sat_wrap() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shl_sat_wrap ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return sat(a << (b & 0x1F)).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
sat(a << (b & 0x1F))

Definition at line 1487 of file math.hpp.

◆ extend_shl_sat_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shl_sat_wrap ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a << (b & 0x1F)), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(sat(a << (b & 0x1F)), c)

Definition at line 1498 of file math.hpp.

◆ extend_shl_wrap() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shl_wrap ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return a << (b & 0x1F).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
a << (b & 0x1F)

Definition at line 1465 of file math.hpp.

◆ extend_shl_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shl_wrap ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a << (b & 0x1F), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(a << (b & 0x1F), c)

Definition at line 1476 of file math.hpp.

◆ extend_shr_clamp() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shr_clamp ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return a >> clamp(b, 0, 32).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
a >> clamp(b, 0, 32)

Definition at line 1509 of file math.hpp.

◆ extend_shr_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shr_clamp ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a >> clamp(b, 0, 32), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(a >> clamp(b, 0, 32), c)

Definition at line 1521 of file math.hpp.

◆ extend_shr_sat_clamp() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shr_sat_clamp ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return sat(a >> clamp(b, 0, 32)).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
sat(a >> clamp(b, 0, 32))

Definition at line 1532 of file math.hpp.

◆ extend_shr_sat_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shr_sat_clamp ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a >> clamp(b, 0, 32)), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(sat(a >> clamp(b, 0, 32)), c)

Definition at line 1544 of file math.hpp.

◆ extend_shr_sat_wrap() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shr_sat_wrap ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return sat(a >> (b & 0x1F)).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
sat(a >> (b & 0x1F))

Definition at line 1577 of file math.hpp.

◆ extend_shr_sat_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shr_sat_wrap ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a >> (b & 0x1F)), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(sat(a >> (b & 0x1F)), c)

Definition at line 1588 of file math.hpp.

◆ extend_shr_wrap() [1/2]

template<typename RetT , typename T >
constexpr RetT syclcompat::extend_shr_wrap ( a,
uint32_t  b 
)
inlineconstexpr

Extend a and b to 33 bit and return a >> (b & 0x1F).

Parameters
[in]aThe source value
[in]bThe offset to shift
Returns
a >> (b & 0x1F)

Definition at line 1555 of file math.hpp.

◆ extend_shr_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT syclcompat::extend_shr_wrap ( a,
uint32_t  b,
uint32_t  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a >> (b & 0x1F), c).

Parameters
[in]aThe source value
[in]bThe offset to shift
[in]cThe value to merge
[in]second_opThe operation to do with the third value
Returns
second_op(a >> (b & 0x1F), c)

Definition at line 1566 of file math.hpp.

◆ extend_sub() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_sub ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and minus them.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend subtraction of the two values

Definition at line 1174 of file math.hpp.

◆ extend_sub() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_sub ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, minus a, b, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend subtraction of a, b and second_op with c

Definition at line 1191 of file math.hpp.

◆ extend_sub_sat() [1/2]

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_sub_sat ( AT  a,
BT  b 
)
inlineconstexpr

Extend a and b to 33 bit and minus them with saturation.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
Parameters
[in]aThe first value
[in]bThe second value
Returns
The extend subtraction of the two values with saturation

Definition at line 1203 of file math.hpp.

◆ extend_sub_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT syclcompat::extend_sub_sat ( AT  a,
BT  b,
CT  c,
BinaryOperation  second_op 
)
inlineconstexpr

Extend Inputs to 33 bit, minus a, b with saturation, then do second_op with c.

Template Parameters
[in]RetT The type of the return value
[in]AT The type of the first value
[in]BT The type of the second value
[in]CT The type of the third value
[in]BinaryOperation The type of the second operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]second_opThe operation to do with the third value
Returns
The extend subtraction of a, b with saturation and second_op with c

Definition at line 1222 of file math.hpp.

◆ extend_vabsdiff2()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vabsdiff2 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized abs_diff of the two values

Definition at line 1692 of file math.hpp.

◆ extend_vabsdiff2_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vabsdiff2_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized abs_diff of the two values and the third value

Definition at line 1708 of file math.hpp.

◆ extend_vabsdiff2_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vabsdiff2_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized abs_diff of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized abs_diff of the two values with saturation

Definition at line 1722 of file math.hpp.

◆ extend_vabsdiff4()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vabsdiff4 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized abs_diff of the two values

Definition at line 1990 of file math.hpp.

◆ extend_vabsdiff4_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vabsdiff4_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized abs_diff of the two values and the third value

Definition at line 2006 of file math.hpp.

◆ extend_vabsdiff4_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vabsdiff4_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized abs_diff of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized abs_diff of the two values with saturation

Definition at line 2020 of file math.hpp.

◆ extend_vadd2()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vadd2 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized addition of the two values

Definition at line 1604 of file math.hpp.

◆ extend_vadd2_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vadd2_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized addition of the two values and the third value

Definition at line 1620 of file math.hpp.

◆ extend_vadd2_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vadd2_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized addition of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized addition of the two values with saturation

Definition at line 1634 of file math.hpp.

◆ extend_vadd4()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vadd4 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized addition of the two values

Definition at line 1902 of file math.hpp.

◆ extend_vadd4_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vadd4_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized addition of the two values and the third value

Definition at line 1918 of file math.hpp.

◆ extend_vadd4_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vadd4_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized addition of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized addition of the two values with saturation

Definition at line 1932 of file math.hpp.

◆ extend_vavrg2()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vavrg2 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized average of the two values

Definition at line 1824 of file math.hpp.

◆ extend_vavrg2_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vavrg2_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend average maximum of the two values and the third value

Definition at line 1841 of file math.hpp.

◆ extend_vavrg2_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vavrg2_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized average of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized average of the two values with saturation

Definition at line 1855 of file math.hpp.

◆ extend_vavrg4()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vavrg4 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized average of the two values

Definition at line 2122 of file math.hpp.

◆ extend_vavrg4_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vavrg4_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized average of the two values and the third value

Definition at line 2139 of file math.hpp.

◆ extend_vavrg4_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vavrg4_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized average of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized average of the two values with saturation

Definition at line 2153 of file math.hpp.

◆ extend_vcompare2()

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned syclcompat::extend_vcompare2 ( AT  a,
BT  b,
BinaryOperation  cmp 
)
inlineconstexpr

Extend a and b to 33 bit and vectorized compare input values using specified comparison cmp .

Template Parameters
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
[in]BinaryOperation The type of the compare operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cmpThe comparsion operator
Returns
The comparison result of the two extended values.

Definition at line 1870 of file math.hpp.

◆ extend_vcompare2_add()

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned syclcompat::extend_vcompare2_add ( AT  a,
BT  b,
unsigned  c,
BinaryOperation  cmp 
)
inlineconstexpr

Extend Inputs to 33 bit, and vectorized compare input values using specified comparison cmp , then add the result with c .

Template Parameters
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
[in]BinaryOperation The type of the compare operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]cmpThe comparsion operator
Returns
The comparison result of the two extended values, and add the result with c .

Definition at line 1887 of file math.hpp.

◆ extend_vcompare4()

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned syclcompat::extend_vcompare4 ( AT  a,
BT  b,
BinaryOperation  cmp 
)
inlineconstexpr

Extend a and b to 33 bit and vectorized compare input values using specified comparison cmp .

Template Parameters
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
[in]BinaryOperation The type of the compare operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cmpThe comparsion operator
Returns
The comparison result of the two extended values.

Definition at line 2168 of file math.hpp.

◆ extend_vcompare4_add()

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned syclcompat::extend_vcompare4_add ( AT  a,
BT  b,
unsigned  c,
BinaryOperation  cmp 
)
inlineconstexpr

Extend Inputs to 33 bit, and vectorized compare input values using specified comparison cmp , then add the result with c .

Template Parameters
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
[in]BinaryOperation The type of the compare operation
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
[in]cmpThe comparsion operator
Returns
The comparison result of the two extended values, and add the result with c .

Definition at line 2185 of file math.hpp.

◆ extend_vmax2()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmax2 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized maximum of the two values

Definition at line 1780 of file math.hpp.

◆ extend_vmax2_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmax2_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized maximum of the two values and the third value

Definition at line 1796 of file math.hpp.

◆ extend_vmax2_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmax2_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized maximum of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized maximum of the two values with saturation

Definition at line 1810 of file math.hpp.

◆ extend_vmax4()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmax4 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized maximum of the two values

Definition at line 2078 of file math.hpp.

◆ extend_vmax4_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmax4_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized maximum of the two values and the third value

Definition at line 2094 of file math.hpp.

◆ extend_vmax4_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmax4_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized maximum of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized maximum of the two values with saturation

Definition at line 2108 of file math.hpp.

◆ extend_vmin2()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmin2 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized minimum of the two values

Definition at line 1736 of file math.hpp.

◆ extend_vmin2_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmin2_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized minimum of the two values and the third value

Definition at line 1752 of file math.hpp.

◆ extend_vmin2_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmin2_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized minimum of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized minimum of the two values with saturation

Definition at line 1766 of file math.hpp.

◆ extend_vmin4()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmin4 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized minimum of the two values

Definition at line 2034 of file math.hpp.

◆ extend_vmin4_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmin4_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized minimum of the two values and the third value

Definition at line 2050 of file math.hpp.

◆ extend_vmin4_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vmin4_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized minimum of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized minimum of the two values with saturation

Definition at line 2064 of file math.hpp.

◆ extend_vsub2()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vsub2 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized subtraction of the two values

Definition at line 1648 of file math.hpp.

◆ extend_vsub2_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vsub2_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized subtraction of the two values and the third value

Definition at line 1664 of file math.hpp.

◆ extend_vsub2_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vsub2_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized subtraction of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized subtraction of the two values with saturation

Definition at line 1678 of file math.hpp.

◆ extend_vsub4()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vsub4 ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized subtraction of the two values

Definition at line 1946 of file math.hpp.

◆ extend_vsub4_add()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vsub4_add ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The addition of each half of extend vectorized subtraction of the two values and the third value

Definition at line 1962 of file math.hpp.

◆ extend_vsub4_sat()

template<typename RetT , typename AT , typename BT >
constexpr RetT syclcompat::extend_vsub4_sat ( AT  a,
BT  b,
RetT  c 
)
inlineconstexpr

Compute vectorized subtraction of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters
[in]RetT The type of the return value, can only be 32 bit integer
[in]AT The type of the first value, can only be 32 bit integer
[in]BT The type of the second value, can only be 32 bit integer
Parameters
[in]aThe first value
[in]bThe second value
[in]cThe third value
Returns
The extend vectorized subtraction of the two values with saturation

Definition at line 1976 of file math.hpp.

◆ fast_length()

float syclcompat::fast_length ( const float *  a,
int  len 
)
inline

Compute fast_length for variable-length array.

Parameters
[in]aThe array
[in]lenLength of the array
Returns
The computed fast_length

Definition at line 411 of file math.hpp.

References sycl::_V1::ext::intel::math::sqrt().

◆ ffs()

template<typename T >
int syclcompat::ffs ( a)
inline

Find position of first least significant set bit in an integer.

ffs(0) returns 0.

Parameters
[in]aInput integer value
Returns
The position

Definition at line 198 of file util.hpp.

◆ fill()

template<class T >
static void syclcompat::fill ( void *  dev_ptr,
const T &  pattern,
size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously sets pattern to the first count elements starting from dev_ptr.

The function will return after the fill operation is completed.

Template Parameters
TDatatype of the value to be set.
Parameters
dev_ptrPointer to the device memory address.
patternPattern of type T to be set.
countNumber of elements to be set to the patten.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 820 of file memory.hpp.

References syclcompat::detail::fill(), and sycl::_V1::event::wait().

◆ fill_async()

template<class T >
static sycl::event syclcompat::fill_async ( void *  dev_ptr,
const T &  pattern,
size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously sets pattern to the first count elements starting from dev_ptr.

The return of the function does NOT guarantee the fill operation is completed.

Template Parameters
TDatatype of the pattern to be set.
Parameters
dev_ptrPointer to the device memory address.
patternPattern of type T to be set.
countNumber of elements to be set to the patten.
qThe queue in which the operation is done.
Returns
An event representing the fill operation.

Definition at line 837 of file memory.hpp.

References syclcompat::detail::fill().

◆ filter_device()

static void syclcompat::filter_device ( const std::vector< std::string > &  dev_subnames)
inlinestatic

Filter out devices; only keep the device whose name contains one of the subname in dev_subnames.

May break device id mapping and change current device. It's better to be called before other SYCLcompat or SYCL APIs.

Definition at line 916 of file device.hpp.

References syclcompat::detail::dev_mgr::filter(), and syclcompat::detail::dev_mgr::instance().

◆ fmax_nan() [1/2]

template<typename ValueT , typename ValueU >
sycl::vec<std::common_type_t<ValueT, ValueU>, 2> syclcompat::fmax_nan ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueU, 2 >  b 
)
inline

Definition at line 752 of file math.hpp.

References fmax_nan().

◆ fmax_nan() [2/2]

template<typename ValueT , typename ValueU >
std::common_type_t<ValueT, ValueU> syclcompat::fmax_nan ( const ValueT  a,
const ValueU  b 
)
inline

Performs 2 elements comparison and returns the bigger one.

If either of inputs is NaN, then return NaN.

Parameters
[in]aThe first value
[in]bThe second value
Returns
the bigger value

Definition at line 743 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by fmax_nan().

◆ fmin_nan() [1/2]

template<typename ValueT , typename ValueU >
sycl::vec<std::common_type_t<ValueT, ValueU>, 2> syclcompat::fmin_nan ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueU, 2 >  b 
)
inline

Definition at line 771 of file math.hpp.

References fmin_nan().

◆ fmin_nan() [2/2]

template<typename ValueT , typename ValueU >
std::common_type_t<ValueT, ValueU> syclcompat::fmin_nan ( const ValueT  a,
const ValueU  b 
)
inline

Performs 2 elements comparison and returns the smaller one.

If either of inputs is NaN, then return NaN.

Parameters
[in]aThe first value
[in]bThe second value
Returns
the smaller value

Definition at line 762 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by fmin_nan().

◆ free()

static void syclcompat::free ( void *  ptr,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Free the memory ptr on the default queue without synchronizing.

Parameters
ptrPoint to free.
Returns
no return value.

Definition at line 633 of file memory.hpp.

Referenced by syclcompat::device_memory< T, Memory, Dimension >::~device_memory(), and syclcompat::detail::host_buffer::~host_buffer().

◆ funnelshift_l()

unsigned int syclcompat::funnelshift_l ( unsigned int  low,
unsigned int  high,
unsigned int  shift 
)
inline

Emulated function for __funnelshift_l.

Definition at line 384 of file math.hpp.

◆ funnelshift_lc()

unsigned int syclcompat::funnelshift_lc ( unsigned int  low,
unsigned int  high,
unsigned int  shift 
)
inline

Emulated function for __funnelshift_lc.

Definition at line 390 of file math.hpp.

◆ funnelshift_r()

unsigned int syclcompat::funnelshift_r ( unsigned int  low,
unsigned int  high,
unsigned int  shift 
)
inline

Emulated function for __funnelshift_r.

Definition at line 396 of file math.hpp.

◆ funnelshift_rc()

unsigned int syclcompat::funnelshift_rc ( unsigned int  low,
unsigned int  high,
unsigned int  shift 
)
inline

Emulated function for __funnelshift_rc.

Definition at line 402 of file math.hpp.

◆ get_current_device()

static device_ext& syclcompat::get_current_device ( )
inlinestatic

◆ get_current_device_id()

static unsigned int syclcompat::get_current_device_id ( )
inlinestatic

Util function to get the id of current device in device manager.

Definition at line 887 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device_id(), and syclcompat::detail::dev_mgr::instance().

◆ get_default_context()

static sycl::context syclcompat::get_default_context ( )
inlinestatic

Util function to get the context of the default queue of current device in device manager.

Definition at line 903 of file device.hpp.

References syclcompat::device_ext::get_context(), and get_current_device().

◆ get_default_queue()

static sycl::queue syclcompat::get_default_queue ( )
inlinestatic

Util function to get the default queue of current device in device manager.

Definition at line 864 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::device_ext::default_queue(), and syclcompat::detail::dev_mgr::instance().

Referenced by launch(), and syclcompat::experimental::launch().

◆ get_device()

◆ get_device_id()

static unsigned int syclcompat::get_device_id ( const sycl::device dev)
inlinestatic

◆ get_kernel_function()

static kernel_function syclcompat::get_kernel_function ( kernel_library library,
const std::string &  name 
)
inlinestatic

Find kernel function in a kernel library and return its address.

Parameters
[in]libraryHandle to the kernel library.
[in]nameName of the kernel function.

Definition at line 435 of file kernel.hpp.

◆ get_kernel_function_info() [1/2]

static kernel_function_info syclcompat::get_kernel_function_info ( const void *  function)
inlinestatic

◆ get_kernel_function_info() [2/2]

static void syclcompat::get_kernel_function_info ( kernel_function_info kernel_info,
const void *  function 
)
inlinestatic

◆ get_major_version()

static int syclcompat::get_major_version ( const sycl::device dev)
static

Definition at line 325 of file device.hpp.

References syclcompat::detail::get_version().

Referenced by syclcompat::device_ext::get_major_version().

◆ get_minor_version()

static int syclcompat::get_minor_version ( const sycl::device dev)
static

Definition at line 331 of file device.hpp.

References syclcompat::detail::get_version().

Referenced by syclcompat::device_ext::get_minor_version().

◆ get_sycl_language_version()

int syclcompat::get_sycl_language_version ( )
inline

Inherited from the original SYCLomatic compatibility headers.

Returns
compiler's SYCL version if defined, 202000 otherwise.

Definition at line 502 of file util.hpp.

◆ int_as_queue_ptr()

queue_ptr syclcompat::int_as_queue_ptr ( uintptr_t  x)
inline

If x <= 2, then return a pointer to the default queue; otherwise, return x reinterpreted as a queue_ptr.

Definition at line 924 of file util.hpp.

◆ invoke_kernel_function()

static void syclcompat::invoke_kernel_function ( kernel_function function,
sycl::queue queue,
sycl::range< 3 >  group_range,
sycl::range< 3 >  local_range,
unsigned int  local_mem_size,
void **  kernel_params,
void **  extra 
)
inlinestatic

Invoke a kernel function.

Parameters
[in]functionkernel function.
[in]queueSYCL queue used to execute kernel
[in]group_rangeSYCL group range
[in]local_rangeSYCL local range
[in]local_mem_sizeThe size of local memory required by the kernel function.
[in]kernel_paramsArray of pointers to kernel arguments.
[in]extraExtra arguments.

Definition at line 459 of file kernel.hpp.

◆ isnan()

template<typename ValueT >
std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::isnan ( const ValueT  a)
inline

Determine whether 2 element value is NaN.

Parameters
[in]aThe input value
Returns
the comparison result

Definition at line 683 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by syclcompat::detail::isnan().

◆ launch() [1/8]

template<auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const dim3 grid,
const dim3 threads,
Args...  args 
)

Definition at line 136 of file launch.hpp.

References get_default_queue().

◆ launch() [2/8]

template<auto F, typename... Args>
sycl::event syclcompat::launch ( const dim3 grid,
const dim3 threads,
size_t  mem_size,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
gridGrid dimensions represented with an (x, y, z) iteration space.
threadsBlock dimensions represented with an (x, y, z) iteration space.
mem_sizeThe size, in number of bytes, of the local memory to be allocated.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 218 of file launch.hpp.

References get_default_queue().

◆ launch() [3/8]

template<auto F, typename... Args>
sycl::event syclcompat::launch ( const dim3 grid,
const dim3 threads,
size_t  mem_size,
sycl::queue  q,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
gridGrid dimensions represented with an (x, y, z) iteration space.
threadsBlock dimensions represented with an (x, y, z) iteration space.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 196 of file launch.hpp.

◆ launch() [4/8]

template<auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const dim3 grid,
const dim3 threads,
sycl::queue  q,
Args...  args 
)

Definition at line 130 of file launch.hpp.

◆ launch() [5/8]

template<auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const sycl::nd_range< Dim > &  range,
Args...  args 
)

Definition at line 123 of file launch.hpp.

References get_default_queue().

◆ launch() [6/8]

template<auto F, int Dim, typename... Args>
sycl::event syclcompat::launch ( const sycl::nd_range< Dim > &  range,
size_t  mem_size,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
rangeNd_range specifying the work group and global sizes for the kernel.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 175 of file launch.hpp.

References get_default_queue().

◆ launch() [7/8]

template<auto F, int Dim, typename... Args>
sycl::event syclcompat::launch ( const sycl::nd_range< Dim > &  range,
size_t  mem_size,
sycl::queue  q,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
rangeNd_range specifying the work group and global sizes for the kernel.
qThe SYCL queue on which to execute the kernel.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 155 of file launch.hpp.

◆ launch() [8/8]

template<auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const sycl::nd_range< Dim > &  range,
sycl::queue  q,
Args...  args 
)

Definition at line 117 of file launch.hpp.

◆ length()

template<typename ValueT >
ValueT syclcompat::length ( const ValueT *  a,
const int  len 
)
inline

Calculate the square root of the input array.

Parameters
[in]aThe array pointer
[in]lenLength of the array
Returns
The square root

Definition at line 436 of file math.hpp.

References sycl::_V1::ext::intel::math::sqrt().

Referenced by syclcompat::device_info::set_name(), and sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::writeRegion().

◆ list_devices()

static void syclcompat::list_devices ( )
inlinestatic

List all the devices with its id in dev_mgr.

Definition at line 921 of file device.hpp.

References syclcompat::detail::dev_mgr::instance(), and syclcompat::detail::dev_mgr::list_devices().

◆ load_kernel_library()

static kernel_library syclcompat::load_kernel_library ( const std::string &  name)
inlinestatic

Load kernel library and return a handle to use the library.

Parameters
[in]nameThe name of the library.

Definition at line 386 of file kernel.hpp.

References syclcompat::detail::load_dl_from_data().

◆ load_kernel_library_mem()

static kernel_library syclcompat::load_kernel_library_mem ( char const *const  image)
inlinestatic

Load kernel library whose image is alreay in memory and return a handle to use the library.

Parameters
[in]imageA pointer to the image in memory.

Definition at line 400 of file kernel.hpp.

References syclcompat::detail::get_lib_size(), and syclcompat::detail::load_dl_from_data().

◆ local_mem()

template<typename AllocT >
auto* syclcompat::local_mem ( )

Definition at line 71 of file memory.hpp.

Referenced by syclcompat::detail::launch().

◆ malloc() [1/3]

static void* syclcompat::malloc ( size_t &  pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block for 2D array on the device.

Parameters
[out]pitchAligned size of x in bytes.
xRange in dim x.
yRange in dim y.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 612 of file memory.hpp.

References syclcompat::detail::malloc().

Referenced by syclcompat::device_memory< T, Memory, Dimension >::device_memory().

◆ malloc() [2/3]

static void* syclcompat::malloc ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block on the device.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.
Parameters
TDatatype to allocate
countNumber of elements to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 538 of file memory.hpp.

References syclcompat::detail::malloc().

◆ malloc() [3/3]

static pitched_data syclcompat::malloc ( sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block for 3D array on the device.

Parameters
sizeSize of the memory block, in bytes.
qQueue to execute the allocate task.
Returns
A pitched_data object which stores the memory info.

Definition at line 596 of file memory.hpp.

References sycl::_V1::detail::array< dimensions >::get(), syclcompat::detail::malloc(), syclcompat::pitched_data::set_data_ptr(), and syclcompat::pitched_data::set_pitch().

◆ malloc_host()

static void* syclcompat::malloc_host ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block on the host.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.
Parameters
TDatatype to allocate
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 557 of file memory.hpp.

◆ malloc_shared()

static void* syclcompat::malloc_shared ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block of usm_shared memory.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 577 of file memory.hpp.

◆ match_all_over_sub_group()

template<typename T >
unsigned int syclcompat::match_all_over_sub_group ( sycl::sub_group  g,
unsigned  member_mask,
value,
int *  pred 
)

The function match_all_over_sub_group conducts a comparison of values across work-items within a sub-group.

match_all_over_sub_group return member_mask and predicate pred will be set to 1 if all value that provided by each work-item in member_mask are equal, otherwise return 0 and the predicate pred will be set to 0. The n-th bit of member_mask representing the work-item with id n. The parameter member_mask indicating the work-items participating the call.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]member_maskInput mask
[in]valueInput value
[out]predOutput predicate
Returns
The result

Definition at line 561 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ match_any_over_sub_group()

template<typename T >
unsigned int syclcompat::match_any_over_sub_group ( sycl::sub_group  g,
unsigned  member_mask,
value 
)

The function match_any_over_sub_group conducts a comparison of values across work-items within a sub-group.

match_any_over_sub_group return a mask in which some bits are set to 1, indicating that the value provided by the work-item represented by these bits are equal. The n-th bit of mask representing the work-item with id n. The parameter member_mask indicating the work-items participating the call.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]member_maskInput mask
[in]valueInput value
Returns
The result

Definition at line 522 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ max() [1/3]

◆ max() [2/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_integral_v<ValueT> && std::is_integral_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::max ( ValueT  a,
ValueU  b 
)
inline

Definition at line 723 of file math.hpp.

◆ max() [3/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_floating_point_v<ValueT> && std::is_floating_point_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::max ( ValueT  a,
ValueU  b 
)
inline

Definition at line 731 of file math.hpp.

◆ memcpy() [1/4]

static void syclcompat::memcpy ( pitched_data  to,
sycl::id< 3 >  to_pos,
pitched_data  from,
sycl::id< 3 >  from_pos,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size.

Parameters
toDestination matrix info.
to_posPosition of destination.
fromSource matrix info.
from_posPosition of destination.
sizeRange of the submatrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 783 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memcpy() [2/4]

template<typename T >
static void syclcompat::memcpy ( type_identity_t< T > *  to_ptr,
const type_identity_t< T > *  from_ptr,
size_t  count,
sycl::queue  q = get_default_queue() 
)
static

Synchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Template Parameters
TDatatype to be copied.
Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
countNumber of T to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 720 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::event::wait().

◆ memcpy() [3/4]

static void syclcompat::memcpy ( void *  to_ptr,
const void *  from_ptr,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Synchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
sizeNumber of bytes to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 672 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::event::wait().

◆ memcpy() [4/4]

static void syclcompat::memcpy ( void *  to_ptr,
size_t  to_pitch,
const void *  from_ptr,
size_t  from_pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The function will return after the copy is completed.

Parameters
to_ptrPointer to destination memory address.
to_pitchRange of dim x in bytes of destination matrix.
from_ptrPointer to source memory address.
from_pitchRange of dim x in bytes of source matrix.
xRange of dim x of matrix to be copied.
yRange of dim y of matrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 742 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memcpy_async() [1/4]

static sycl::event syclcompat::memcpy_async ( pitched_data  to,
sycl::id< 3 >  to_pos,
pitched_data  from,
sycl::id< 3 >  from_pos,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size. The return of the function does NOT guarantee the copy is completed.

Parameters
toDestination matrix info.
to_posPosition of destination.
fromSource matrix info.
from_posPosition of destination.
sizeRange of the submatrix to be copied.
qQueue to execute the copy task.
Returns
An event representing the memcpy operation.

Definition at line 802 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memcpy().

◆ memcpy_async() [2/4]

template<typename T >
static sycl::event syclcompat::memcpy_async ( type_identity_t< T > *  to_ptr,
const type_identity_t< T > *  from_ptr,
size_t  count,
sycl::queue  q = get_default_queue() 
)
static

Asynchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Template Parameters
TDatatype to be copied.
Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
countNumber of T to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 703 of file memory.hpp.

References syclcompat::detail::memcpy().

◆ memcpy_async() [3/4]

static sycl::event syclcompat::memcpy_async ( void *  to_ptr,
const void *  from_ptr,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Asynchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
sizeNumber of bytes to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 686 of file memory.hpp.

References syclcompat::detail::memcpy().

◆ memcpy_async() [4/4]

static sycl::event syclcompat::memcpy_async ( void *  to_ptr,
size_t  to_pitch,
const void *  from_ptr,
size_t  from_pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters
to_ptrPointer to destination memory address.
to_pitchRange of dim x in bytes of destination matrix.
from_ptrPointer to source memory address.
from_pitchRange of dim x in bytes of source matrix.
xRange of dim x of matrix to be copied.
yRange of dim y of matrix to be copied.
qQueue to execute the copy task.
Returns
An event representing the memcpy operation.

Definition at line 763 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memcpy().

◆ memset() [1/3]

static void syclcompat::memset ( pitched_data  pitch,
int  val,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The function will return after the memset operation is completed.

Parameters
pitchSpecify the 3D memory region.
valueValue to be set.
sizeThe setted 3D memory size.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 1039 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset() [2/3]

static void syclcompat::memset ( void *  dev_ptr,
int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Synchronously sets value to the first size bytes starting from dev_ptr.

The function will return after the memset operation is completed.

Parameters
dev_ptrPointer to the device memory address.
valueValue to be set.
sizeNumber of bytes to be set to the value.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 876 of file memory.hpp.

References syclcompat::detail::memset(), and sycl::_V1::event::wait().

◆ memset() [3/3]

static void syclcompat::memset ( void *  ptr,
size_t  pitch,
int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.

Definition at line 948 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_async() [1/3]

static sycl::event syclcompat::memset_async ( pitched_data  pitch,
int  val,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The return of the function does NOT guarantee the memset operation is completed.

Parameters
pitchSpecify the 3D memory region.
valueValue to be set.
sizeThe setted 3D memory size.
qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 1053 of file memory.hpp.

References syclcompat::detail::combine_events().

◆ memset_async() [2/3]

static sycl::event syclcompat::memset_async ( void *  dev_ptr,
int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 1 byte data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters
dev_ptrPointer to the device memory address.
valueValue to be set.
sizeNumber of bytes to be set to the value.
Returns
An event representing the memset operation.

Definition at line 909 of file memory.hpp.

References syclcompat::detail::memset().

◆ memset_async() [3/3]

static sycl::event syclcompat::memset_async ( void *  ptr,
size_t  pitch,
int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 990 of file memory.hpp.

References syclcompat::detail::combine_events().

◆ memset_d16() [1/2]

static void syclcompat::memset_d16 ( void *  dev_ptr,
unsigned short  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data value to the first size elements starting from dev_ptr in q synchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.

Definition at line 887 of file memory.hpp.

◆ memset_d16() [2/2]

static void syclcompat::memset_d16 ( void *  ptr,
size_t  pitch,
unsigned short  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.

Definition at line 961 of file memory.hpp.

References syclcompat::detail::memset(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_d16_async() [1/2]

static sycl::event syclcompat::memset_d16_async ( void *  dev_ptr,
unsigned short  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 922 of file memory.hpp.

◆ memset_d16_async() [2/2]

static sycl::event syclcompat::memset_d16_async ( void *  ptr,
size_t  pitch,
unsigned short  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 1008 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memset().

◆ memset_d32() [1/2]

static void syclcompat::memset_d32 ( void *  dev_ptr,
unsigned int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data value to the first size elements starting from dev_ptr in q synchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.

Definition at line 898 of file memory.hpp.

◆ memset_d32() [2/2]

static void syclcompat::memset_d32 ( void *  ptr,
size_t  pitch,
unsigned int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.

Definition at line 975 of file memory.hpp.

References syclcompat::detail::memset(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_d32_async() [1/2]

static sycl::event syclcompat::memset_d32_async ( void *  dev_ptr,
unsigned int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters
[in]dev_ptrPointer to the virtual device memory address.
[in]valueThe value to be set.
[in]sizeNumber of elements to be set to the value.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 935 of file memory.hpp.

◆ memset_d32_async() [2/2]

static sycl::event syclcompat::memset_d32_async ( void *  ptr,
size_t  pitch,
unsigned int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters
[in]ptrPointer to the virtual device memory.
[in]pitchThe pitch size by number of elements, including padding.
[in]valThe value to be set.
[in]xThe width of memory region by number of elements.
[in]yThe height of memory region by number of elements.
[in]qThe queue in which the operation is done.
Returns
An event representing the memset operation.

Definition at line 1024 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memset().

◆ min() [1/3]

◆ min() [2/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_integral_v<ValueT> && std::is_integral_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::min ( ValueT  a,
ValueU  b 
)
inline

Definition at line 705 of file math.hpp.

◆ min() [3/3]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_floating_point_v<ValueT> && std::is_floating_point_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::min ( ValueT  a,
ValueU  b 
)
inline

Definition at line 713 of file math.hpp.

◆ operator*()

dim3 syclcompat::operator* ( const dim3 a,
const dim3 b 
)
inline

Definition at line 60 of file dims.hpp.

◆ operator+()

dim3 syclcompat::operator+ ( const dim3 a,
const dim3 b 
)
inline

Definition at line 64 of file dims.hpp.

◆ operator-()

dim3 syclcompat::operator- ( const dim3 a,
const dim3 b 
)
inline

Definition at line 68 of file dims.hpp.

◆ permute_sub_group_by_xor()

template<typename T >
T syclcompat::permute_sub_group_by_xor ( sycl::sub_group  g,
x,
unsigned int  mask,
int  logical_sub_group_size = 32 
)

permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is bitwise exclusive OR of the caller's id and mask. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]maskInput mask
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 298 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ pow() [1/4]

double syclcompat::pow ( const double  a,
const int  b 
)
inline

Definition at line 777 of file math.hpp.

◆ pow() [2/4]

float syclcompat::pow ( const float  a,
const int  b 
)
inline

Definition at line 776 of file math.hpp.

◆ pow() [3/4]

template<typename ValueT , typename ValueU >
std::enable_if_t<std::is_floating_point_v<ValueT>, ValueT> syclcompat::pow ( const ValueT  a,
const ValueU  b 
)
inline

Definition at line 781 of file math.hpp.

References pow().

◆ pow() [4/4]

template<typename ValueT , typename ValueU >
std::enable_if_t<!std::is_floating_point_v<ValueT>, double> syclcompat::pow ( const ValueT  a,
const ValueU  b 
)
inline

Definition at line 790 of file math.hpp.

Referenced by pow().

◆ relu() [1/3]

template<class ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, sycl::marray<ValueT, 2> > syclcompat::relu ( const sycl::marray< ValueT, 2 >  a)
inline

Definition at line 817 of file math.hpp.

References relu().

◆ relu() [2/3]

template<class ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, sycl::vec<ValueT, 2> > syclcompat::relu ( const sycl::vec< ValueT, 2 >  a)
inline

Definition at line 810 of file math.hpp.

References relu().

◆ relu() [3/3]

template<typename ValueT >
std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, ValueT> syclcompat::relu ( const ValueT  a)
inline

Performs relu saturation.

Parameters
[in]aThe input value
Returns
the relu saturation result

Definition at line 801 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by relu().

◆ reverse_bits()

template<typename T >
T syclcompat::reverse_bits ( a)
inline

Reverse the bit order of an unsigned integer.

Parameters
[in]aInput unsigned integer value
Returns
Value of a with the bit order reversed

Definition at line 161 of file util.hpp.

◆ select_device() [1/2]

template<class DeviceSelector >
static std::enable_if_t< std::is_invocable_r_v<int, DeviceSelector, const sycl::device &> > syclcompat::select_device ( const DeviceSelector &  selector = sycl::gpu_selector_v)
inlinestatic

◆ select_device() [2/2]

static unsigned int syclcompat::select_device ( unsigned int  id)
inlinestatic

◆ select_from_sub_group()

template<typename T >
T syclcompat::select_from_sub_group ( sycl::sub_group  g,
x,
int  remote_local_id,
int  logical_sub_group_size = 32 
)

select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size

  • 1]. Each work-item in logical sub_group gets value from another work-item whose id is remote_local_id. If remote_local_id is outside the logical sub_group id range, remote_local_id will modulo with logical_sub_group_size. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.
    Template Parameters
    TInput value type
    Parameters
    [in]gInput sub_group
    [in]xInput value
    [in]remote_local_idInput source work item id
    [in]logical_sub_group_sizeInput logical sub_group size
    Returns
    The result

Definition at line 218 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ set_default_queue()

static void syclcompat::set_default_queue ( const sycl::queue q)
inlinestatic

Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well.

This function will be blocking if there are submitted kernels in the previous default queue.

Parameters
qNew user-defined queue

Definition at line 875 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::detail::dev_mgr::instance(), and syclcompat::device_ext::set_default_queue().

◆ shift_sub_group_left()

template<typename T >
T syclcompat::shift_sub_group_left ( sycl::sub_group  g,
x,
unsigned int  delta,
int  logical_sub_group_size = 32 
)

shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is caller's id adds delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]deltaInput delta
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 242 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ shift_sub_group_right()

template<typename T >
T syclcompat::shift_sub_group_right ( sycl::sub_group  g,
x,
unsigned int  delta,
int  logical_sub_group_size = 32 
)

shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right.

The input sub_group will be divided into several logical_sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical_sub_group gets value from another work-item whose id is caller's id subtracts delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]deltaInput delta
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 270 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ unload_kernel_library()

static void syclcompat::unload_kernel_library ( const kernel_library library)
inlinestatic

Unload kernel library.

Parameters
[in,out]libraryHandle to the library to be closed.

Definition at line 408 of file kernel.hpp.

◆ unordered_compare() [1/2]

template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v<std::invoke_result_t<BinaryOperation, ValueT, ValueT>, bool>, bool> syclcompat::unordered_compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs unordered comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 495 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by unordered_compare(), unordered_compare_both(), and unordered_compare_mask().

◆ unordered_compare() [2/2]

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::unordered_compare ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element unordered comparison.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 507 of file math.hpp.

References unordered_compare().

◆ unordered_compare_both()

template<typename ValueT , class BinaryOperation >
std::enable_if_t<ValueT::size() == 2, bool> syclcompat::unordered_compare_both ( const ValueT  a,
const ValueT  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 element unordered comparison and return true if both results are true.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 532 of file math.hpp.

References unordered_compare().

◆ unordered_compare_mask()

template<typename ValueT , class BinaryOperation >
unsigned syclcompat::unordered_compare_mask ( const sycl::vec< ValueT, 2 >  a,
const sycl::vec< ValueT, 2 >  b,
const BinaryOperation  binary_op 
)
inline

Performs 2 elements unordered comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements.

Parameters
[in]aThe first value
[in]bThe second value
[in]binary_opfunctor that implements the binary operation
Returns
the comparison result

Definition at line 562 of file math.hpp.

References unordered_compare().

◆ vectorized_binary()

template<typename VecT , class BinaryOperation >
unsigned syclcompat::vectorized_binary ( unsigned  a,
unsigned  b,
const BinaryOperation  binary_op 
)
inline

Compute vectorized binary operation value for two values, with each value treated as a vector type VecT.

Template Parameters
[in]VecT The type of the vector
[in]BinaryOperation The binary operation class
Parameters
[in]aThe first value
[in]bThe second value
Returns
The vectorized binary operation value of the two values

Definition at line 984 of file math.hpp.

◆ vectorized_isgreater()

template<typename S , typename T >
T syclcompat::vectorized_isgreater ( a,
b 
)
inline

Compute vectorized isgreater for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized greater than of the two values

Definition at line 642 of file math.hpp.

◆ vectorized_isgreater< sycl::ushort2, unsigned >()

template<>
unsigned syclcompat::vectorized_isgreater< sycl::ushort2, unsigned > ( unsigned  a,
unsigned  b 
)
inline

Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short.

Parameters
[in]aThe first value
[in]bThe second value
Returns
The vectorized greater than of the two values

Definition at line 657 of file math.hpp.

◆ vectorized_max()

template<typename S , typename T >
T syclcompat::vectorized_max ( a,
b 
)
inline

Compute vectorized max for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized max of the two values

Definition at line 576 of file math.hpp.

◆ vectorized_min()

template<typename S , typename T >
T syclcompat::vectorized_min ( a,
b 
)
inline

Compute vectorized min for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized min of the two values

Definition at line 592 of file math.hpp.

◆ vectorized_sum_abs_diff()

template<typename VecT >
unsigned syclcompat::vectorized_sum_abs_diff ( unsigned  a,
unsigned  b 
)
inline

Compute vectorized absolute difference for two values without modulo overflow, with each value treated as a vector type VecT.

Template Parameters
[in]VecT The type of the vector
Parameters
[in]aThe first value
[in]bThe second value
Returns
The vectorized absolute difference of the two values

Definition at line 623 of file math.hpp.

References sycl::_V1::ext::intel::esimd::detail::sum().

◆ vectorized_unary()

template<typename VecT , class UnaryOperation >
unsigned syclcompat::vectorized_unary ( unsigned  a,
const UnaryOperation  unary_op 
)
inline

Compute vectorized unary operation for a value, with the value treated as a vector type VecT.

Template Parameters
[in]VecT The type of the vector
[in]UnaryOperation The unary operation class
Parameters
[in]aThe input value
Returns
The vectorized unary operation value of the input value

Definition at line 608 of file math.hpp.

◆ wait()

static void syclcompat::wait ( sycl::queue  q = get_default_queue())
inlinestatic

Definition at line 879 of file device.hpp.

Referenced by syclcompat::device_ext::~device_ext().

◆ wait_and_free()

static void syclcompat::wait_and_free ( void *  ptr,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Wait on the queue q and free the memory ptr.

Parameters
ptrPoint to free.
qQueue to execute the free task.
Returns
no return value.

Definition at line 621 of file memory.hpp.

References get_current_device(), syclcompat::device_ext::queues_wait_and_throw(), and sycl::_V1::queue::wait().

◆ wait_and_throw()

static void syclcompat::wait_and_throw ( sycl::queue  q = get_default_queue())
inlinestatic

Definition at line 881 of file device.hpp.

◆ wg_barrier()

void syclcompat::wg_barrier ( )
inline

Definition at line 32 of file id_query.hpp.