Namespaces
	detail
	Atomic extension to implement standard APIs in std::atomic.

	experimental

	global_id

	global_range

	local_id

	local_range

	work_group_id

	work_group_range

Classes
class	atomic

class	device_info

class	device_ext
	device extension More...

class	dim3

struct	kernel_function_info

class	kernel_library

class	kernel_function

struct	abs
	A sycl::abs wrapper functors. More...

struct	abs_diff
	A sycl::abs_diff wrapper functors. More...

struct	add_sat
	A sycl::add_sat wrapper functors. More...

struct	rhadd
	A sycl::rhadd wrapper functors. More...

struct	hadd
	A sycl::hadd wrapper functors. More...

struct	maximum
	A sycl::max wrapper functors. More...

struct	minimum
	A sycl::min wrapper functors. More...

struct	sub_sat
	A sycl::sub_sat wrapper functors. More...

class	pitched_data
	Pitched 2D/3D memory data. More...

class	accessor
	accessor used as device function parameter. More...

class	accessor< T, Memory, 3 >

class	accessor< T, Memory, 2 >

class	device_memory
	Device variable with address space of shared or global. More...

class	device_memory< T, Memory, 0 >

class	pointer_attributes

struct	type_identity

struct	arith

class	args_selector

class	args_selector< n_nondefault_params, n_default_params, R(Ts...)>
	args_selector is a helper class for extracting arguments from an array of pointers to arguments or buffer of arguments to pass to a kernel function. More...

Typedefs
using	event_ptr = sycl::event *

using	queue_ptr = sycl::queue *

using	device_ptr = char *

typedef void(*	kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void , void )

template<typename T1 , typename T2 >
using	dot_product_acc_t = std::conditional_t< std::is_unsigned_v< T1 > &&std::is_unsigned_v< T2 >, uint32_t, int32_t >

using	byte_t = uint8_t

template<class T , size_t Dimension>
using	global_memory = device_memory< T, memory_region::global, Dimension >

template<class T , size_t Dimension>
using	constant_memory = device_memory< T, memory_region::constant, Dimension >

template<class T , size_t Dimension>
using	shared_memory = device_memory< T, memory_region::usm_shared, Dimension >

template<class T >
using	type_identity_t = typename type_identity< T >::type

template<typename T >
using	arith_t = typename arith< T >::type

using	err0 = detail::generic_error_type< struct err0_tag, int >

using	err1 = detail::generic_error_type< struct err1_tag, int >

Enumerations
enum	error_code { SUCCESS = 0 , BACKEND_ERROR = 1 , DEFAULT_ERROR = 999 }

enum class	memory_region { global = 0 , constant , local , usm_shared }

enum class	target { device , local }

Functions
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_fetch_add (T *addr, arith_t< T > operand)
	Atomically add the value operand to the value at the addr and assign the result to the value at addr. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_fetch_sub (T *addr, arith_t< T > operand)
	Atomically subtract the value operand from the value at the addr and assign the result to the value at addr. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_fetch_and (T *addr, type_identity_t< T > operand)
	Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_fetch_or (T *addr, type_identity_t< T > operand)
	Atomically or the value at the addr with the value operand, and assign the result to the value at addr. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_fetch_xor (T *addr, type_identity_t< T > operand)
	Atomically xor the value at the addr with the value operand, and assign the result to the value at addr. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_fetch_min (T *addr, type_identity_t< T > operand)
	Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_fetch_max (T *addr, type_identity_t< T > operand)
	Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int	atomic_fetch_compare_dec (unsigned int *addr, unsigned int operand)
	Atomically set `operand` to the value stored in `addr`, if old value stored in `addr` is equal to zero or greater than `operand`, else decrease the value stored in `addr`. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int	atomic_fetch_compare_inc (unsigned int *addr, unsigned int operand)
	Atomically increment the value stored in `addr` if old value stored in `addr` is less than `operand`, else set 0 to the value stored in `addr`. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_exchange (T *addr, type_identity_t< T > operand)
	Atomically exchange the value at the address addr with the value operand. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_compare_exchange_strong (sycl::multi_ptr< T, addressSpace > addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
	Atomically compare the value at `addr` to the value expected and exchange with the value desired if the value at `addr` is equal to the value expected. More...

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T	atomic_compare_exchange_strong (T *addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
	Atomically compare the value at `addr` to the value expected and exchange with the value desired if the value at `addr` is equal to the value expected. More...

static void	destroy_event (event_ptr event)
	Destroy `event` pointed memory. More...

static int	get_major_version (const sycl::device &dev)

static int	get_minor_version (const sycl::device &dev)

static sycl::queue	create_queue (bool print_on_async_exceptions=false, bool in_order=true)

static sycl::queue	get_default_queue ()
	Util function to get the default queue of current device in device manager. More...

static void	set_default_queue (const sycl::queue &q)
	Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well. More...

static void	wait (sycl::queue q=get_default_queue())

static void	wait_and_throw (sycl::queue q=get_default_queue())

static unsigned int	get_current_device_id ()
	Util function to get the id of current device in device manager. More...

static device_ext &	get_current_device ()
	Util function to get the current device. More...

static device_ext &	get_device (unsigned int id)
	Util function to get a device by id. More...

static sycl::context	get_default_context ()
	Util function to get the context of the default queue of current device in device manager. More...

static device_ext &	cpu_device ()
	Util function to get a CPU device. More...

static void	filter_device (const std::vector< std::string > &dev_subnames)
	Filter out devices; only keep the device whose name contains one of the subname in `dev_subnames`. More...

static void	list_devices ()
	List all the devices with its id in dev_mgr. More...

static unsigned int	select_device (unsigned int id)

template<class DeviceSelector >
static std::enable_if_t< std::is_invocable_r_v< int, DeviceSelector, const sycl::device & > >	select_device (const DeviceSelector &selector=sycl::gpu_selector_v)

static unsigned int	get_device_id (const sycl::device &dev)

static unsigned int	device_count ()

dim3	operator* (const dim3 &a, const dim3 &b)

dim3	operator+ (const dim3 &a, const dim3 &b)

dim3	operator- (const dim3 &a, const dim3 &b)

void	wg_barrier ()

static void	get_kernel_function_info (kernel_function_info kernel_info, const void function)

static kernel_function_info	get_kernel_function_info (const void *function)

static kernel_library	load_kernel_library (const std::string &name)
	Load kernel library and return a handle to use the library. More...

static kernel_library	load_kernel_library_mem (char const *const image)
	Load kernel library whose image is alreay in memory and return a handle to use the library. More...

static void	unload_kernel_library (const kernel_library &library)
	Unload kernel library. More...

static kernel_function	get_kernel_function (kernel_library &library, const std::string &name)
	Find kernel function in a kernel library and return its address. More...

static void	invoke_kernel_function (kernel_function &function, sycl::queue &queue, sycl::range< 3 > group_range, sycl::range< 3 > local_range, unsigned int local_mem_size, void kernel_params, void extra)
	Invoke a kernel function. More...

template<int Dim>
sycl::nd_range< Dim >	compute_nd_range (sycl::range< Dim > global_size_in, sycl::range< Dim > work_group_size)

sycl::nd_range< 1 >	compute_nd_range (int global_size_in, int work_group_size)

template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event >	launch (const sycl::nd_range< Dim > &range, sycl::queue q, Args... args)

template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event >	launch (const sycl::nd_range< Dim > &range, Args... args)

template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event >	launch (const dim3 &grid, const dim3 &threads, sycl::queue q, Args... args)

template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event >	launch (const dim3 &grid, const dim3 &threads, Args... args)

template<auto F, int Dim, typename... Args>
sycl::event	launch (const sycl::nd_range< Dim > &range, size_t mem_size, sycl::queue q, Args... args)
	Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue. More...

template<auto F, int Dim, typename... Args>
sycl::event	launch (const sycl::nd_range< Dim > &range, size_t mem_size, Args... args)
	Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue. More...

template<auto F, typename... Args>
sycl::event	launch (const dim3 &grid, const dim3 &threads, size_t mem_size, sycl::queue q, Args... args)
	Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue. More...

template<auto F, typename... Args>
sycl::event	launch (const dim3 &grid, const dim3 &threads, size_t mem_size, Args... args)
	Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue. More...

template<typename T >
T	bfe_safe (const T source, const uint32_t bit_start, const uint32_t num_bits)
	Bitfield-extract with boundary checking. More...

template<typename T >
T	bfi_safe (const T x, const T y, const uint32_t bit_start, const uint32_t num_bits)
	Bitfield-insert with boundary checking. More...

unsigned int	funnelshift_l (unsigned int low, unsigned int high, unsigned int shift)
	Emulated function for __funnelshift_l. More...

unsigned int	funnelshift_lc (unsigned int low, unsigned int high, unsigned int shift)
	Emulated function for __funnelshift_lc. More...

unsigned int	funnelshift_r (unsigned int low, unsigned int high, unsigned int shift)
	Emulated function for __funnelshift_r. More...

unsigned int	funnelshift_rc (unsigned int low, unsigned int high, unsigned int shift)
	Emulated function for __funnelshift_rc. More...

float	fast_length (const float *a, int len)
	Compute fast_length for variable-length array. More...

template<typename ValueT >
ValueT	length (const ValueT *a, const int len)
	Calculate the square root of the input array. More...

template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v< std::invoke_result_t< BinaryOperation, ValueT, ValueT >, bool >, bool >	compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
	Performs comparison. More...

template<typename ValueT >
std::enable_if_t< std::is_same_v< std::invoke_result_t< std::not_equal_to<>, ValueT, ValueT >, bool >, bool >	compare (const ValueT a, const ValueT b, const std::not_equal_to<> binary_op)

template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, ValueT >	compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
	Performs 2 element comparison. More...

template<typename ValueT , class BinaryOperation >
std::enable_if_t< std::is_same_v< std::invoke_result_t< BinaryOperation, ValueT, ValueT >, bool >, bool >	unordered_compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
	Performs unordered comparison. More...

template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, ValueT >	unordered_compare (const ValueT a, const ValueT b, const BinaryOperation binary_op)
	Performs 2 element unordered comparison. More...

template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, bool >	compare_both (const ValueT a, const ValueT b, const BinaryOperation binary_op)
	Performs 2 element comparison and return true if both results are true. More...

template<typename ValueT , class BinaryOperation >
std::enable_if_t< ValueT::size()==2, bool >	unordered_compare_both (const ValueT a, const ValueT b, const BinaryOperation binary_op)
	Performs 2 element unordered comparison and return true if both results are true. More...

template<typename ValueT , class BinaryOperation >
unsigned	compare_mask (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const BinaryOperation binary_op)
	Performs 2 elements comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements. More...

template<typename ValueT , class BinaryOperation >
unsigned	unordered_compare_mask (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const BinaryOperation binary_op)
	Performs 2 elements unordered comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements. More...

template<typename S , typename T >
T	vectorized_max (T a, T b)
	Compute vectorized max for two values, with each value treated as a vector type `S`. More...

template<typename S , typename T >
T	vectorized_min (T a, T b)
	Compute vectorized min for two values, with each value treated as a vector type `S`. More...

template<typename VecT , class UnaryOperation >
unsigned	vectorized_unary (unsigned a, const UnaryOperation unary_op)
	Compute vectorized unary operation for a value, with the value treated as a vector type `VecT`. More...

template<typename VecT >
unsigned	vectorized_sum_abs_diff (unsigned a, unsigned b)
	Compute vectorized absolute difference for two values without modulo overflow, with each value treated as a vector type `VecT`. More...

template<typename S , typename T >
T	vectorized_isgreater (T a, T b)
	Compute vectorized isgreater for two values, with each value treated as a vector type `S`. More...

template<>
unsigned	vectorized_isgreater< sycl::ushort2, unsigned > (unsigned a, unsigned b)
	Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short. More...

template<typename ValueT >
ValueT	clamp (ValueT val, ValueT min_val, ValueT max_val)
	Returns min(max(val, min_val), max_val) More...

template<typename ValueT >
std::enable_if_t< ValueT::size()==2, ValueT >	isnan (const ValueT a)
	Determine whether 2 element value is NaN. More...

template<typename ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >\|\|std::is_same_v< sycl::half, ValueT >, ValueT >	cbrt (ValueT val)
	cbrt function wrapper. More...

template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_integral_v< ValueT > &&std::is_integral_v< ValueU >, std::common_type_t< ValueT, ValueU > >	min (ValueT a, ValueU b)

template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT > &&std::is_floating_point_v< ValueU >, std::common_type_t< ValueT, ValueU > >	min (ValueT a, ValueU b)

sycl::half	min (sycl::half a, sycl::half b)

template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_integral_v< ValueT > &&std::is_integral_v< ValueU >, std::common_type_t< ValueT, ValueU > >	max (ValueT a, ValueU b)

template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT > &&std::is_floating_point_v< ValueU >, std::common_type_t< ValueT, ValueU > >	max (ValueT a, ValueU b)

sycl::half	max (sycl::half a, sycl::half b)

template<typename ValueT , typename ValueU >
std::common_type_t< ValueT, ValueU >	fmax_nan (const ValueT a, const ValueU b)
	Performs 2 elements comparison and returns the bigger one. More...

template<typename ValueT , typename ValueU >
sycl::vec< std::common_type_t< ValueT, ValueU >, 2 >	fmax_nan (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueU, 2 > b)

template<typename ValueT , typename ValueU >
std::common_type_t< ValueT, ValueU >	fmin_nan (const ValueT a, const ValueU b)
	Performs 2 elements comparison and returns the smaller one. More...

template<typename ValueT , typename ValueU >
sycl::vec< std::common_type_t< ValueT, ValueU >, 2 >	fmin_nan (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueU, 2 > b)

float	pow (const float a, const int b)

double	pow (const double a, const int b)

template<typename ValueT , typename ValueU >
std::enable_if_t< std::is_floating_point_v< ValueT >, ValueT >	pow (const ValueT a, const ValueU b)

template<typename ValueT , typename ValueU >
std::enable_if_t<!std::is_floating_point_v< ValueT >, double >	pow (const ValueT a, const ValueU b)

template<typename ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >\|\|std::is_same_v< sycl::half, ValueT >, ValueT >	relu (const ValueT a)
	Performs relu saturation. More...

template<class ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >\|\|std::is_same_v< sycl::half, ValueT >, sycl::vec< ValueT, 2 > >	relu (const sycl::vec< ValueT, 2 > a)

template<class ValueT >
std::enable_if_t< std::is_floating_point_v< ValueT >\|\|std::is_same_v< sycl::half, ValueT >, sycl::marray< ValueT, 2 > >	relu (const sycl::marray< ValueT, 2 > a)

template<typename T >
sycl::vec< T, 2 >	cmul (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
	Computes the multiplication of two complex numbers. More...

template<typename T >
sycl::vec< T, 2 >	cdiv (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
	Computes the division of two complex numbers. More...

template<typename T >
T	cabs (sycl::vec< T, 2 > x)
	Computes the magnitude of a complex number. More...

template<typename T >
sycl::vec< T, 2 >	conj (sycl::vec< T, 2 > x)
	Computes the complex conjugate of a complex number. More...

template<typename ValueT >
sycl::vec< ValueT, 2 >	cmul_add (const sycl::vec< ValueT, 2 > a, const sycl::vec< ValueT, 2 > b, const sycl::vec< ValueT, 2 > c)
	Performs complex number multiply addition. More...

template<typename ValueT >
sycl::marray< ValueT, 2 >	cmul_add (const sycl::marray< ValueT, 2 > a, const sycl::marray< ValueT, 2 > b, const sycl::marray< ValueT, 2 > c)

template<typename VecT , class BinaryOperation >
unsigned	vectorized_binary (unsigned a, unsigned b, const BinaryOperation binary_op)
	Compute vectorized binary operation value for two values, with each value treated as a vector type `VecT`. More...

template<typename T1 , typename T2 >
dot_product_acc_t< T1, T2 >	dp2a_lo (T1 a, T2 b, dot_product_acc_t< T1, T2 > c)
	Two-way dot product-accumulate. More...

template<typename T1 , typename T2 >
dot_product_acc_t< T1, T2 >	dp2a_hi (T1 a, T2 b, dot_product_acc_t< T1, T2 > c)
	Two-way dot product-accumulate. More...

template<typename T1 , typename T2 >
dot_product_acc_t< T1, T2 >	dp4a (T1 a, T2 b, dot_product_acc_t< T1, T2 > c)
	Four-way byte dot product-accumulate. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_add (AT a, BT b)
	Extend `a` and `b` to 33 bit and add them. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_add (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, add `a`, `b`, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_add_sat (AT a, BT b)
	Extend `a` and `b` to 33 bit and add them with saturation. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_add_sat (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, add `a`, `b` with saturation, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_sub (AT a, BT b)
	Extend `a` and `b` to 33 bit and minus them. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_sub (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, minus `a`, `b`, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_sub_sat (AT a, BT b)
	Extend `a` and `b` to 33 bit and minus them with saturation. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_sub_sat (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, minus `a`, `b` with saturation, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_absdiff (AT a, BT b)
	Extend `a` and `b` to 33 bit and do abs_diff. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_absdiff (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, abs_diff `a`, `b`, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_absdiff_sat (AT a, BT b)
	Extend `a` and `b` to 33 bit and do abs_diff with saturation. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_absdiff_sat (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, abs_diff `a`, `b` with saturation, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_min (AT a, BT b)
	Extend `a` and `b` to 33 bit and return smaller one. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_min (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, find the smaller one in `a`, `b`, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_min_sat (AT a, BT b)
	Extend `a` and `b` to 33 bit and return smaller one with saturation. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_min_sat (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, find the smaller one in `a`, `b` with saturation, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_max (AT a, BT b)
	Extend `a` and `b` to 33 bit and return bigger one. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_max (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, find the bigger one in `a`, `b`, then do `second_op` with `c`. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_max_sat (AT a, BT b)
	Extend `a` and `b` to 33 bit and return bigger one with saturation. More...

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >
constexpr RetT	extend_max_sat (AT a, BT b, CT c, BinaryOperation second_op)
	Extend Inputs to 33 bit, find the bigger one in `a`, `b` with saturation, then do `second_op` with `c`. More...

template<typename RetT , typename T >
constexpr RetT	extend_shl_clamp (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return a << clamp(b, 0, 32). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shl_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(a << clamp(b, 0, 32), c). More...

template<typename RetT , typename T >
constexpr RetT	extend_shl_sat_clamp (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return sat(a << clamp(b, 0, 32)). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shl_sat_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(sat(a << clamp(b, 0, 32)), c). More...

template<typename RetT , typename T >
constexpr RetT	extend_shl_wrap (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return a << (b & 0x1F). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shl_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(a << (b & 0x1F), c). More...

template<typename RetT , typename T >
constexpr RetT	extend_shl_sat_wrap (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return sat(a << (b & 0x1F)). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shl_sat_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(sat(a << (b & 0x1F)), c). More...

template<typename RetT , typename T >
constexpr RetT	extend_shr_clamp (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return a >> clamp(b, 0, 32). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shr_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(a >> clamp(b, 0, 32), c). More...

template<typename RetT , typename T >
constexpr RetT	extend_shr_sat_clamp (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return sat(a >> clamp(b, 0, 32)). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shr_sat_clamp (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(sat(a >> clamp(b, 0, 32)), c). More...

template<typename RetT , typename T >
constexpr RetT	extend_shr_wrap (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return a >> (b & 0x1F). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shr_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(a >> (b & 0x1F), c). More...

template<typename RetT , typename T >
constexpr RetT	extend_shr_sat_wrap (T a, uint32_t b)
	Extend `a` and `b` to 33 bit and return sat(a >> (b & 0x1F)). More...

template<typename RetT , typename T , typename BinaryOperation >
constexpr RetT	extend_shr_sat_wrap (T a, uint32_t b, uint32_t c, BinaryOperation second_op)
	Extend Inputs to 33 bit, and return second_op(sat(a >> (b & 0x1F)), c). More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vadd2 (AT a, BT b, RetT c)
	Compute vectorized addition of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vadd2_add (AT a, BT b, RetT c)
	Compute vectorized addition of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vadd2_sat (AT a, BT b, RetT c)
	Compute vectorized addition of `a` and `b` with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vsub2 (AT a, BT b, RetT c)
	Compute vectorized subtraction of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vsub2_add (AT a, BT b, RetT c)
	Compute vectorized subtraction of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vsub2_sat (AT a, BT b, RetT c)
	Compute vectorized subtraction of `a` and `b` with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vabsdiff2 (AT a, BT b, RetT c)
	Compute vectorized abs_diff of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vabsdiff2_add (AT a, BT b, RetT c)
	Compute vectorized abs_diff of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vabsdiff2_sat (AT a, BT b, RetT c)
	Compute vectorized abs_diff of `a` and `b` with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmin2 (AT a, BT b, RetT c)
	Compute vectorized minimum of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmin2_add (AT a, BT b, RetT c)
	Compute vectorized minimum of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmin2_sat (AT a, BT b, RetT c)
	Compute vectorized minimum of `a` and `b` with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmax2 (AT a, BT b, RetT c)
	Compute vectorized maximum of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmax2_add (AT a, BT b, RetT c)
	Compute vectorized maximum of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmax2_sat (AT a, BT b, RetT c)
	Compute vectorized maximum of `a` and `b` with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vavrg2 (AT a, BT b, RetT c)
	Compute vectorized average of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vavrg2_add (AT a, BT b, RetT c)
	Compute vectorized average of `a` and `b`, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vavrg2_sat (AT a, BT b, RetT c)
	Compute vectorized average of `a` and `b` with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit. More...

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned	extend_vcompare2 (AT a, BT b, BinaryOperation cmp)
	Extend `a` and `b` to 33 bit and vectorized compare input values using specified comparison `cmp` . More...

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned	extend_vcompare2_add (AT a, BT b, unsigned c, BinaryOperation cmp)
	Extend Inputs to 33 bit, and vectorized compare input values using specified comparison `cmp` , then add the result with `c` . More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vadd4 (AT a, BT b, RetT c)
	Compute vectorized addition of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vadd4_add (AT a, BT b, RetT c)
	Compute vectorized addition of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vadd4_sat (AT a, BT b, RetT c)
	Compute vectorized addition of `a` and `b` with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vsub4 (AT a, BT b, RetT c)
	Compute vectorized subtraction of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vsub4_add (AT a, BT b, RetT c)
	Compute vectorized subtraction of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vsub4_sat (AT a, BT b, RetT c)
	Compute vectorized subtraction of `a` and `b` with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vabsdiff4 (AT a, BT b, RetT c)
	Compute vectorized abs_diff of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vabsdiff4_add (AT a, BT b, RetT c)
	Compute vectorized abs_diff of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vabsdiff4_sat (AT a, BT b, RetT c)
	Compute vectorized abs_diff of `a` and `b` with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmin4 (AT a, BT b, RetT c)
	Compute vectorized minimum of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmin4_add (AT a, BT b, RetT c)
	Compute vectorized minimum of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmin4_sat (AT a, BT b, RetT c)
	Compute vectorized minimum of `a` and `b` with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmax4 (AT a, BT b, RetT c)
	Compute vectorized maximum of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmax4_add (AT a, BT b, RetT c)
	Compute vectorized maximum of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vmax4_sat (AT a, BT b, RetT c)
	Compute vectorized maximum of `a` and `b` with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vavrg4 (AT a, BT b, RetT c)
	Compute vectorized average of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vavrg4_add (AT a, BT b, RetT c)
	Compute vectorized average of `a` and `b`, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename RetT , typename AT , typename BT >
constexpr RetT	extend_vavrg4_sat (AT a, BT b, RetT c)
	Compute vectorized average of `a` and `b` with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit. More...

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned	extend_vcompare4 (AT a, BT b, BinaryOperation cmp)
	Extend `a` and `b` to 33 bit and vectorized compare input values using specified comparison `cmp` . More...

template<typename AT , typename BT , typename BinaryOperation >
constexpr unsigned	extend_vcompare4_add (AT a, BT b, unsigned c, BinaryOperation cmp)
	Extend Inputs to 33 bit, and vectorized compare input values using specified comparison `cmp` , then add the result with `c` . More...

template<typename AllocT >
auto *	local_mem ()

static void *	malloc (size_t num_bytes, sycl::queue q=get_default_queue())
	Allocate memory block on the device. More...

static void *	malloc_host (size_t num_bytes, sycl::queue q=get_default_queue())
	Allocate memory block on the host. More...

static void *	malloc_shared (size_t num_bytes, sycl::queue q=get_default_queue())
	Allocate memory block of usm_shared memory. More...

static pitched_data	malloc (sycl::range< 3 > size, sycl::queue q=get_default_queue())
	Allocate memory block for 3D array on the device. More...

static void *	malloc (size_t &pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
	Allocate memory block for 2D array on the device. More...

static void	wait_and_free (void *ptr, sycl::queue q=get_default_queue())
	Wait on the queue `q` and free the memory `ptr`. More...

static void	free (void *ptr, sycl::queue q=get_default_queue())
	Free the memory `ptr` on the default queue without synchronizing. More...

sycl::event	enqueue_free (const std::vector< void * > &pointers, const std::vector< sycl::event > &events, sycl::queue q=get_default_queue())
	Enqueues the release of all pointers in /p pointers on the /p q. More...

static void	memcpy (void to_ptr, const void from_ptr, size_t size, sycl::queue q=get_default_queue())
	Synchronously copies `size` bytes from the address specified by `from_ptr` to the address specified by `to_ptr`. More...

static sycl::event	memcpy_async (void to_ptr, const void from_ptr, size_t size, sycl::queue q=get_default_queue())
	Asynchronously copies `size` bytes from the address specified by `from_ptr` to the address specified by `to_ptr`. More...

template<typename T >
static sycl::event	memcpy_async (type_identity_t< T > to_ptr, const type_identity_t< T > from_ptr, size_t count, sycl::queue q=get_default_queue())
	Asynchronously copies `count` T's from the address specified by `from_ptr` to the address specified by `to_ptr`. More...

template<typename T >
static void	memcpy (type_identity_t< T > to_ptr, const type_identity_t< T > from_ptr, size_t count, sycl::queue q=get_default_queue())
	Synchronously copies `count` T's from the address specified by `from_ptr` to the address specified by `to_ptr`. More...

static void	memcpy (void to_ptr, size_t to_pitch, const void from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
	Synchronously copies 2D matrix specified by `x` and `y` from the address specified by `from_ptr` to the address specified by `to_ptr`, while `from_pitch` and `to_pitch` are the range of dim x in bytes of the matrix specified by `from_ptr` and `to_ptr`. More...

static sycl::event	memcpy_async (void to_ptr, size_t to_pitch, const void from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
	Asynchronously copies 2D matrix specified by `x` and `y` from the address specified by `from_ptr` to the address specified by `to_ptr`, while `from_pitch and` `to_pitch` are the range of dim x in bytes of the matrix specified by `from_ptr` and `to_ptr`. More...

static void	memcpy (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
	Synchronously copies a subset of a 3D matrix specified by `to` to another 3D matrix specified by `from`. More...

static sycl::event	memcpy_async (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
	Asynchronously copies a subset of a 3D matrix specified by `to` to another 3D matrix specified by `from`. More...

template<class T >
static void	fill (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
	Synchronously sets `pattern` to the first `count` elements starting from `dev_ptr`. More...

template<class T >
static sycl::event	fill_async (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
	Asynchronously sets `pattern` to the first `count` elements starting from `dev_ptr`. More...

static void	memset (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
	Synchronously sets `value` to the first `size` bytes starting from `dev_ptr`. More...

static void	memset_d16 (void *dev_ptr, unsigned short value, size_t size, sycl::queue q=get_default_queue())
	Sets 2 bytes data `value` to the first `size` elements starting from `dev_ptr` in `q` synchronously. More...

static void	memset_d32 (void *dev_ptr, unsigned int value, size_t size, sycl::queue q=get_default_queue())
	Sets 4 bytes data `value` to the first `size` elements starting from `dev_ptr` in `q` synchronously. More...

static sycl::event	memset_async (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
	Sets 1 byte data `value` to the first `size` elements starting from `dev_ptr` in `q` asynchronously. More...

static sycl::event	memset_d16_async (void *dev_ptr, unsigned short value, size_t size, sycl::queue q=get_default_queue())
	Sets 2 bytes data `value` to the first `size` elements starting from `dev_ptr` in `q` asynchronously. More...

static sycl::event	memset_d32_async (void *dev_ptr, unsigned int value, size_t size, sycl::queue q=get_default_queue())
	Sets 4 bytes data `value` to the first `size` elements starting from `dev_ptr` in `q` asynchronously. More...

static void	memset (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
	Sets 1 byte data `val` to the pitched 2D memory region pointed by `ptr` in `q` synchronously. More...

static void	memset_d16 (void *ptr, size_t pitch, unsigned short val, size_t x, size_t y, sycl::queue q=get_default_queue())
	Sets 2 bytes data `val` to the pitched 2D memory region pointed by ptr in `q` synchronously. More...

static void	memset_d32 (void *ptr, size_t pitch, unsigned int val, size_t x, size_t y, sycl::queue q=get_default_queue())
	Sets 4 bytes data `val` to the pitched 2D memory region pointed by ptr in `q` synchronously. More...

static sycl::event	memset_async (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
	Sets 1 byte data `val` to the pitched 2D memory region pointed by `ptr` in `q` asynchronously. More...

static sycl::event	memset_d16_async (void *ptr, size_t pitch, unsigned short val, size_t x, size_t y, sycl::queue q=get_default_queue())
	Sets 2 bytes data `val` to the pitched 2D memory region pointed by `ptr` in `q` asynchronously. More...

static sycl::event	memset_d32_async (void *ptr, size_t pitch, unsigned int val, size_t x, size_t y, sycl::queue q=get_default_queue())
	Sets 4 bytes data `val` to the pitched 2D memory region pointed by `ptr` in `q` asynchronously. More...

static void	memset (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
	Sets `value` to the 3D memory region specified by `pitch` in `q`. More...

static sycl::event	memset_async (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
	Sets `value` to the 3D memory region specified by `pitch` in `q`. More...

int	cast_double_to_int (double d, bool use_high32=true)
	Cast the high or low 32 bits of a double to an integer. More...

double	cast_ints_to_double (int high32, int low32)
	Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double. More...

template<typename T >
T	reverse_bits (T a)
	Reverse the bit order of an unsigned integer. More...

unsigned int	byte_level_permute (unsigned int a, unsigned int b, unsigned int s)

template<typename T >
int	ffs (T a)
	Find position of first least significant set bit in an integer. More...

template<typename T >
T	select_from_sub_group (sycl::sub_group g, T x, int remote_local_id, int logical_sub_group_size=32)
	select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group. More...

template<typename T >
T	shift_sub_group_left (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
	shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left. More...

template<typename T >
T	shift_sub_group_right (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
	shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right. More...

template<typename T >
T	permute_sub_group_by_xor (sycl::sub_group g, T x, unsigned int mask, int logical_sub_group_size=32)
	permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask. More...

int	get_sycl_language_version ()
	Inherited from the original SYCLomatic compatibility headers. More...

template<typename T >
unsigned int	match_any_over_sub_group (sycl::sub_group g, unsigned member_mask, T value)
	The function match_any_over_sub_group conducts a comparison of values across work-items within a sub-group. More...

template<typename T >
unsigned int	match_all_over_sub_group (sycl::sub_group g, unsigned member_mask, T value, int *pred)
	The function match_all_over_sub_group conducts a comparison of values across work-items within a sub-group. More...

queue_ptr	int_as_queue_ptr (uintptr_t x)
	If x <= 2, then return a pointer to the default queue; otherwise, return x reinterpreted as a queue_ptr. More...

Typedef Documentation

◆ arith_t

template<typename T >

using syclcompat::arith_t = typedef typename arith<T>::type

Definition at line 42 of file traits.hpp.

◆ byte_t

using syclcompat::byte_t = typedef uint8_t

Definition at line 98 of file memory.hpp.

◆ constant_memory

template<class T , size_t Dimension>

using syclcompat::constant_memory = typedef device_memory<T, memory_region::constant, Dimension>

Definition at line 1282 of file memory.hpp.

◆ device_ptr

using syclcompat::device_ptr = typedef char *

Definition at line 116 of file device.hpp.

◆ dot_product_acc_t

template<typename T1 , typename T2 >

using syclcompat::dot_product_acc_t = typedef std::conditional_t<std::is_unsigned_v<T1> && std::is_unsigned_v<T2>, uint32_t, int32_t>

Definition at line 996 of file math.hpp.

◆ err0

using syclcompat::err0 = typedef detail::generic_error_type<struct err0_tag, int>

Definition at line 133 of file util.hpp.

◆ err1

using syclcompat::err1 = typedef detail::generic_error_type<struct err1_tag, int>

Definition at line 134 of file util.hpp.

◆ event_ptr

using syclcompat::event_ptr = typedef sycl::event *

Definition at line 112 of file device.hpp.

◆ global_memory

template<class T , size_t Dimension>

using syclcompat::global_memory = typedef device_memory<T, memory_region::global, Dimension>

Definition at line 1280 of file memory.hpp.

◆ kernel_functor

typedef void(* syclcompat::kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)

Definition at line 59 of file kernel.hpp.

◆ queue_ptr

using syclcompat::queue_ptr = typedef sycl::queue *

Definition at line 114 of file device.hpp.

◆ shared_memory

template<class T , size_t Dimension>

using syclcompat::shared_memory = typedef device_memory<T, memory_region::usm_shared, Dimension>

Definition at line 1284 of file memory.hpp.

◆ type_identity_t

template<class T >

using syclcompat::type_identity_t = typedef typename type_identity<T>::type

Definition at line 35 of file traits.hpp.

Enumeration Type Documentation

◆ error_code

enum syclcompat::error_code

Enumerator
SUCCESS
BACKEND_ERROR
DEFAULT_ERROR

Definition at line 70 of file defs.hpp.

◆ memory_region

enum syclcompat::memory_region

strong

Enumerator
global
constant
local
usm_shared

Definition at line 89 of file memory.hpp.

◆ target

enum syclcompat::target

strong

Enumerator
device
local

Definition at line 96 of file memory.hpp.

Function Documentation

◆ atomic_compare_exchange_strong() [1/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_compare_exchange_strong	(	sycl::multi_ptr< T, addressSpace >	addr,
		type_identity_t< T >	expected,
		type_identity_t< T >	desired,
		sycl::memory_order	success = `sycl::memory_order::relaxed`,
		sycl::memory_order	fail = `sycl::memory_order::relaxed`
	)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters

[in,out]	addr	Multi_ptr.
	expected	The value to compare against the value at `addr`.
	desired	The value to assign to `addr` if the value at `addr` is expected.
	success	The memory ordering used when comparison succeeds.
	fail	The memory ordering used when comparison fails.

Returns: The value at the addr before the call.

Definition at line 253 of file atomic.hpp.

◆ atomic_compare_exchange_strong() [2/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_compare_exchange_strong	(	T *	addr,
		type_identity_t< T >	expected,
		type_identity_t< T >	desired,
		sycl::memory_order	success = `sycl::memory_order::relaxed`,
		sycl::memory_order	fail = `sycl::memory_order::relaxed`
	)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters

[in]	addr	The pointer to the data.
	expected	The value to compare against the value at `addr`.
	desired	The value to assign to `addr` if the value at `addr` is expected.
	success	The memory ordering used when comparison succeeds.
	fail	The memory ordering used when comparison fails.

Returns: The value at the addr before the call.

Definition at line 279 of file atomic.hpp.

◆ atomic_exchange()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_exchange	(	T *	addr,
		type_identity_t< T >	operand
	)

inline

Atomically exchange the value at the address addr with the value operand.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The value to be exchanged with the value pointed by `addr`.
	memoryOrder	The memory ordering used.

Returns: The value at the addr before the call.

Definition at line 232 of file atomic.hpp.

◆ atomic_fetch_add()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_fetch_add	(	T *	addr,
		arith_t< T >	operand
	)

inline

Atomically add the value operand to the value at the addr and assign the result to the value at addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The value to add to the value at `addr`.
	memoryOrder	The memory ordering used.

Returns: The value at the addr before the call.

Definition at line 56 of file atomic.hpp.

◆ atomic_fetch_and()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_fetch_and	(	T *	addr,
		type_identity_t< T >	operand
	)

inline

Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The value to use in bitwise AND operation with the value at the `addr`.
	memoryOrder	The memory ordering used.

Returns: The value at the addr before the call.

Definition at line 91 of file atomic.hpp.

◆ atomic_fetch_compare_dec()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>

unsigned int syclcompat::atomic_fetch_compare_dec	(	unsigned int *	addr,
		unsigned int	operand
	)

Atomically set operand to the value stored in addr, if old value stored in addr is equal to zero or greater than operand, else decrease the value stored in addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The threshold value.
	memoryOrder	The memory ordering used.

Returns: The old value stored in addr.

Definition at line 176 of file atomic.hpp.

◆ atomic_fetch_compare_inc()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>

unsigned int syclcompat::atomic_fetch_compare_inc	(	unsigned int *	addr,
		unsigned int	operand
	)

inline

Atomically increment the value stored in addr if old value stored in addr is less than operand, else set 0 to the value stored in addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The threshold value.
	memoryOrder	The memory ordering used.

Returns: The old value stored in addr.

Definition at line 205 of file atomic.hpp.

◆ atomic_fetch_max()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_fetch_max	(	T *	addr,
		type_identity_t< T >	operand
	)

inline

Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand.
	memoryOrder	The memory ordering used.

Returns: The value at the addr before the call.

Definition at line 160 of file atomic.hpp.

◆ atomic_fetch_min()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_fetch_min	(	T *	addr,
		type_identity_t< T >	operand
	)

inline

Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand.

Definition at line 143 of file atomic.hpp.

◆ atomic_fetch_or()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_fetch_or	(	T *	addr,
		type_identity_t< T >	operand
	)

inline

Atomically or the value at the addr with the value operand, and assign the result to the value at addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The value to use in bitwise OR operation with the value at the `addr`.
	memoryOrder	The memory ordering used.

Returns: The value at the addr before the call.

Definition at line 109 of file atomic.hpp.

◆ atomic_fetch_sub()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_fetch_sub	(	T *	addr,
		arith_t< T >	operand
	)

inline

Atomically subtract the value operand from the value at the addr and assign the result to the value at addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The value to subtract from the value at `addr`.
	memoryOrder	The memory ordering used.

Returns: The value at the addr before the call.

Definition at line 73 of file atomic.hpp.

◆ atomic_fetch_xor()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >

T syclcompat::atomic_fetch_xor	(	T *	addr,
		type_identity_t< T >	operand
	)

inline

Atomically xor the value at the addr with the value operand, and assign the result to the value at addr.

Parameters

[in,out]	addr	The pointer to the data.
	operand	The value to use in bitwise XOR operation with the value at the `addr`.
	memoryOrder	The memory ordering used.

Returns: The value at the addr before the call.

Definition at line 127 of file atomic.hpp.

◆ bfe_safe()

template<typename T >

T syclcompat::bfe_safe	(	const T	source,
		const uint32_t	bit_start,
		const uint32_t	num_bits
	)

inline

Bitfield-extract with boundary checking.

Extract bit field from

Parameters

source	and return the zero or sign-extended result. Source
bit_start	gives the bit field starting bit position, and source
num_bits	gives the bit field length in bits.

The result is padded with the sign bit of the extracted field. If num_bits is zero, the result is zero. If the start position is beyond the msb of the input, the result is filled with the replicated sign bit of the extracted field.

Template Parameters

T	The type of

Parameters

source	value, must be an integer.
source	The source value to extracting.
bit_start	The position to start extracting.
num_bits	The number of bits to extracting.

Definition at line 266 of file math.hpp.

References syclcompat::detail::bfe().

◆ bfi_safe()

template<typename T >

T syclcompat::bfi_safe	(	const T	x,
		const T	y,
		const uint32_t	bit_start,
		const uint32_t	num_bits
	)

inline

Bitfield-insert with boundary checking.

Align and insert a bit field from

Parameters

x	into
y	. Source
bit_start	gives the starting bit position for the insertion, and source
num_bits	gives the bit field length in bits.

Template Parameters

T	The type of

Parameters

x	and
y,must	be an unsigned integer.
x	The source of the bitfield.
y	The source where bitfield is inserted.
bit_start	The position to start insertion.
num_bits	The number of bits to insertion.

Definition at line 357 of file math.hpp.

References syclcompat::detail::bfi().

◆ byte_level_permute()

unsigned int syclcompat::byte_level_permute	(	unsigned int	a,
		unsigned int	b,
		unsigned int	s
	)

inline

Parameters

[in]	a	The first value contains 4 bytes
[in]	b	The second value contains 4 bytes
[in]	s	The selector value, only lower 16bit used

Returns: the permutation result of 4 bytes selected in the way specified by s from a and b

Definition at line 182 of file util.hpp.

◆ cabs()

template<typename T >

T syclcompat::cabs ( sycl::vec< T, 2 > x )

Computes the magnitude of a complex number.

Template Parameters

T	Complex element type

Parameters

[in] x The input complex number

Returns: The result

Definition at line 849 of file math.hpp.

References sycl::_V1::ext::intel::esimd::abs().

◆ cast_double_to_int()

int syclcompat::cast_double_to_int	(	double	d,
		bool	use_high32 = `true`
	)

inline

Cast the high or low 32 bits of a double to an integer.

Parameters

[in]	d	The double value.
[in]	use_high32	Cast the high 32 bits of the double if true; otherwise cast the low 32 bits.

Definition at line 140 of file util.hpp.

◆ cast_ints_to_double()

double syclcompat::cast_ints_to_double	(	int	high32,
		int	low32
	)

inline

Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double.

Parameters

[in]	high32	The integer as the high 32 bits
[in]	low32	The integer as the low 32 bits

Definition at line 152 of file util.hpp.

◆ cbrt()

template<typename ValueT >

std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, ValueT> syclcompat::cbrt ( ValueT val )

inline

cbrt function wrapper.

Definition at line 692 of file math.hpp.

◆ cdiv()

template<typename T >

sycl::vec<T, 2> syclcompat::cdiv	(	sycl::vec< T, 2 >	x,
		sycl::vec< T, 2 >	y
	)

Computes the division of two complex numbers.

Template Parameters

T	Complex element type

Parameters

[in]	x	The first input complex number
[in]	y	The second input complex number

Returns: The result

Definition at line 839 of file math.hpp.

◆ clamp()

template<typename ValueT >

ValueT syclcompat::clamp	(	ValueT	val,
		ValueT	min_val,
		ValueT	max_val
	)

inline

Returns min(max(val, min_val), max_val)

Parameters

[in]	val	The input value
[in]	min_val	The minimum value
[in]	max_val	The maximum value

Returns: the value between min_val and max_val

Definition at line 675 of file math.hpp.

References syclcompat::detail::clamp().

◆ cmul()

template<typename T >

sycl::vec<T, 2> syclcompat::cmul	(	sycl::vec< T, 2 >	x,
		sycl::vec< T, 2 >	y
	)

Computes the multiplication of two complex numbers.

Template Parameters

T	Complex element type

Parameters

[in]	x	The first input complex number
[in]	y	The second input complex number

Returns: The result

Definition at line 827 of file math.hpp.

◆ cmul_add() [1/2]

template<typename ValueT >

sycl::marray<ValueT, 2> syclcompat::cmul_add	(	const sycl::marray< ValueT, 2 >	a,
		const sycl::marray< ValueT, 2 >	b,
		const sycl::marray< ValueT, 2 >	c
	)

inline

Definition at line 880 of file math.hpp.

◆ cmul_add() [2/2]

template<typename ValueT >

sycl::vec<ValueT, 2> syclcompat::cmul_add	(	const sycl::vec< ValueT, 2 >	a,
		const sycl::vec< ValueT, 2 >	b,
		const sycl::vec< ValueT, 2 >	c
	)

inline

Performs complex number multiply addition.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: the operation result

Definition at line 870 of file math.hpp.

◆ compare() [1/3]

template<typename ValueT , class BinaryOperation >

std::enable_if_t< std::is_same_v<std::invoke_result_t<BinaryOperation, ValueT, ValueT>, bool>, bool> syclcompat::compare	(	const ValueT	a,
		const ValueT	b,
		const BinaryOperation	binary_op
	)

inline

Performs comparison.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 463 of file math.hpp.

Referenced by compare(), compare_both(), and compare_mask().

◆ compare() [2/3]

template<typename ValueT , class BinaryOperation >

std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::compare	(	const ValueT	a,
		const ValueT	b,
		const BinaryOperation	binary_op
	)

inline

Performs 2 element comparison.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 482 of file math.hpp.

References compare().

◆ compare() [3/3]

template<typename ValueT >

std::enable_if_t< std::is_same_v<std::invoke_result_t<std::not_equal_to<>, ValueT, ValueT>, bool>, bool> syclcompat::compare	(	const ValueT	a,
		const ValueT	b,
		const std::not_equal_to<>	binary_op
	)

inline

Definition at line 471 of file math.hpp.

References syclcompat::detail::isnan().

◆ compare_both()

template<typename ValueT , class BinaryOperation >

std::enable_if_t<ValueT::size() == 2, bool> syclcompat::compare_both	(	const ValueT	a,
		const ValueT	b,
		const BinaryOperation	binary_op
	)

inline

Performs 2 element comparison and return true if both results are true.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 520 of file math.hpp.

References compare().

◆ compare_mask()

template<typename ValueT , class BinaryOperation >

unsigned syclcompat::compare_mask	(	const sycl::vec< ValueT, 2 >	a,
		const sycl::vec< ValueT, 2 >	b,
		const BinaryOperation	binary_op
	)

inline

Performs 2 elements comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 546 of file math.hpp.

References compare().

◆ compute_nd_range() [1/2]

sycl::nd_range<1> syclcompat::compute_nd_range	(	int	global_size_in,
		int	work_group_size
	)

inline

Definition at line 110 of file launch.hpp.

References sycl::_V1::ext::oneapi::experimental::work_group_size.

◆ compute_nd_range() [2/2]

template<int Dim>

sycl::nd_range<Dim> syclcompat::compute_nd_range	(	sycl::range< Dim >	global_size_in,
		sycl::range< Dim >	work_group_size
	)

inline

Definition at line 93 of file launch.hpp.

References sycl::_V1::range< Dimensions >::size(), and sycl::_V1::ext::oneapi::experimental::work_group_size.

◆ conj()

template<typename T >

sycl::vec<T, 2> syclcompat::conj ( sycl::vec< T, 2 > x )

Computes the complex conjugate of a complex number.

Template Parameters

T	Complex element type

Parameters

[in] x The input complex number

Returns: The result

Definition at line 858 of file math.hpp.

◆ cpu_device()

static device_ext& syclcompat::cpu_device ( )

inlinestatic

Util function to get a CPU device.

Definition at line 916 of file device.hpp.

References syclcompat::detail::dev_mgr::cpu_device(), and syclcompat::detail::dev_mgr::instance().

◆ create_queue()

static sycl::queue syclcompat::create_queue	(	bool	print_on_async_exceptions = `false`,
		bool	in_order = `true`
	)

inlinestatic

Definition at line 864 of file device.hpp.

References syclcompat::device_ext::create_queue(), syclcompat::detail::dev_mgr::current_device(), and syclcompat::detail::dev_mgr::instance().

◆ destroy_event()

static void syclcompat::destroy_event ( event_ptr event )

static

Destroy event pointed memory.

Parameters

event Pointer to the sycl::event address.

Definition at line 121 of file device.hpp.

◆ device_count()

static unsigned int syclcompat::device_count ( )

inlinestatic

Definition at line 949 of file device.hpp.

References syclcompat::detail::dev_mgr::device_count(), and syclcompat::detail::dev_mgr::instance().

◆ dp2a_hi()

template<typename T1 , typename T2 >

dot_product_acc_t<T1, T2> syclcompat::dp2a_hi	(	T1	a,
		T2	b,
		dot_product_acc_t< T1, T2 >	c
	)

inline

Two-way dot product-accumulate.

Calculate and return integer_vector2(

Parameters

a)	dot product integer_vector2(high_16bit(
b))	+
c

Template Parameters

[in]	T1 The type of first value.
[in]	T2 The type of second value.

Parameters

[in]	a	The first value.
[in]	b	The second value.
[in]	c	The third value. uint32_t if both T1 and T1 are uint32_t else has type int32_t.

Returns: Two-way 16-bit to 8-bit dot product which is accumulated in 32-bit result.

Definition at line 1077 of file math.hpp.

References syclcompat::detail::extract_and_sign_or_zero_extend2(), and syclcompat::detail::extract_and_sign_or_zero_extend4().

◆ dp2a_lo()

template<typename T1 , typename T2 >

dot_product_acc_t<T1, T2> syclcompat::dp2a_lo	(	T1	a,
		T2	b,
		dot_product_acc_t< T1, T2 >	c
	)

inline

Two-way dot product-accumulate.

Calculate and return integer_vector2(

Parameters

a)	dot product integer_vector2(low16_bit(
b))	+
c

Template Parameters

[in]	T1 The type of first value.
[in]	T2 The type of second value.

Parameters

[in]	a	The first value.
[in]	b	The second value.
[in]	c	The third value. It has type uint32_t if both T1 and T1 are uint32_t else has type int32_t.

Returns: Two-way 16-bit to 8-bit dot product which is accumulated in 32-bit result.

Definition at line 1030 of file math.hpp.

References syclcompat::detail::extract_and_sign_or_zero_extend2(), and syclcompat::detail::extract_and_sign_or_zero_extend4().

◆ dp4a()

template<typename T1 , typename T2 >

dot_product_acc_t<T1, T2> syclcompat::dp4a	(	T1	a,
		T2	b,
		dot_product_acc_t< T1, T2 >	c
	)

inline

Four-way byte dot product-accumulate.

Calculate and return integer_vector4(

Parameters

a)	dot product integer_vector4(
b)	+
c

Template Parameters

[in]	T1 The type of first value.
[in]	T2 The type of second value.

Parameters

[in]	a	The first value.
[in]	b	The second value.
[in]	c	The third value. It has type uint32_t if both T1 and T1 are uint32_t else has type int32_t.

Returns: Four-way byte dot product which is accumulated in 32-bit result.

Definition at line 1123 of file math.hpp.

References syclcompat::detail::extract_and_sign_or_zero_extend4().

◆ enqueue_free()

sycl::event syclcompat::enqueue_free	(	const std::vector< void * > &	pointers,
		const std::vector< sycl::event > &	events,
		sycl::queue	q = `get_default_queue()`
	)

inline

Enqueues the release of all pointers in /p pointers on the /p q.

The command waits on all passed /p events and returns an event that track the commands execution on the queue.

Parameters

pointers	The pointers point to the device memory requested to be freed.
events	The events to be waited on.
q	The sycl::queue the memory relates to.

Definition at line 647 of file memory.hpp.

◆ extend_absdiff() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_absdiff	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and do abs_diff.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The extend abs_diff of the two values

Definition at line 1289 of file math.hpp.

◆ extend_absdiff() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_absdiff	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, abs_diff a, b, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The extend abs_diff of a, b and second_op with c

Definition at line 1307 of file math.hpp.

◆ extend_absdiff_sat() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_absdiff_sat	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and do abs_diff with saturation.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The extend abs_diff of the two values with saturation

Definition at line 1320 of file math.hpp.

◆ extend_absdiff_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_absdiff_sat	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, abs_diff a, b with saturation, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The extend abs_diff of a, b with saturation and second_op with c

Definition at line 1339 of file math.hpp.

◆ extend_add() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_add	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and add them.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The extend addition of the two values

Definition at line 1167 of file math.hpp.

◆ extend_add() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_add	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, add a, b, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The extend addition of a, b and second_op with c

Definition at line 1184 of file math.hpp.

◆ extend_add_sat() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_add_sat	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and add them with saturation.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The extend addition of the two values with saturation

Definition at line 1196 of file math.hpp.

◆ extend_add_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_add_sat	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, add a, b with saturation, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The extend addition of a, b with saturation and second_op with c

Definition at line 1215 of file math.hpp.

◆ extend_max() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_max	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and return bigger one.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The bigger one of the two extended values

Definition at line 1414 of file math.hpp.

◆ extend_max() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_max	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, find the bigger one in a, b, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The bigger one of a, b and second_op with c

Definition at line 1432 of file math.hpp.

◆ extend_max_sat() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_max_sat	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and return bigger one with saturation.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The bigger one of the two extended values with saturation

Definition at line 1444 of file math.hpp.

◆ extend_max_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_max_sat	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, find the bigger one in a, b with saturation, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The bigger one of a, b with saturation and second_op with c

Definition at line 1463 of file math.hpp.

◆ extend_min() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_min	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and return smaller one.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The smaller one of the two extended values

Definition at line 1352 of file math.hpp.

◆ extend_min() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_min	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, find the smaller one in a, b, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The smaller one of a, b and second_op with c

Definition at line 1370 of file math.hpp.

◆ extend_min_sat() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_min_sat	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and return smaller one with saturation.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The smaller one of the two extended values with saturation

Definition at line 1382 of file math.hpp.

◆ extend_min_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_min_sat	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, find the smaller one in a, b with saturation, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The smaller one of a, b with saturation and second_op with c

Definition at line 1401 of file math.hpp.

◆ extend_shl_clamp() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shl_clamp	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return a << clamp(b, 0, 32).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: a << clamp(b, 0, 32)

Definition at line 1473 of file math.hpp.

◆ extend_shl_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shl_clamp	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a << clamp(b, 0, 32), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(a << clamp(b, 0, 32), c)

Definition at line 1485 of file math.hpp.

◆ extend_shl_sat_clamp() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shl_sat_clamp	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return sat(a << clamp(b, 0, 32)).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: sat(a << clamp(b, 0, 32))

Definition at line 1496 of file math.hpp.

◆ extend_shl_sat_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shl_sat_clamp	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a << clamp(b, 0, 32)), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(sat(a << clamp(b, 0, 32)), c)

Definition at line 1508 of file math.hpp.

◆ extend_shl_sat_wrap() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shl_sat_wrap	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return sat(a << (b & 0x1F)).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: sat(a << (b & 0x1F))

Definition at line 1541 of file math.hpp.

◆ extend_shl_sat_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shl_sat_wrap	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a << (b & 0x1F)), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(sat(a << (b & 0x1F)), c)

Definition at line 1552 of file math.hpp.

◆ extend_shl_wrap() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shl_wrap	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return a << (b & 0x1F).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: a << (b & 0x1F)

Definition at line 1519 of file math.hpp.

◆ extend_shl_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shl_wrap	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a << (b & 0x1F), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(a << (b & 0x1F), c)

Definition at line 1530 of file math.hpp.

◆ extend_shr_clamp() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shr_clamp	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return a >> clamp(b, 0, 32).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: a >> clamp(b, 0, 32)

Definition at line 1563 of file math.hpp.

◆ extend_shr_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shr_clamp	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a >> clamp(b, 0, 32), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(a >> clamp(b, 0, 32), c)

Definition at line 1575 of file math.hpp.

◆ extend_shr_sat_clamp() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shr_sat_clamp	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return sat(a >> clamp(b, 0, 32)).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: sat(a >> clamp(b, 0, 32))

Definition at line 1586 of file math.hpp.

◆ extend_shr_sat_clamp() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shr_sat_clamp	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a >> clamp(b, 0, 32)), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(sat(a >> clamp(b, 0, 32)), c)

Definition at line 1598 of file math.hpp.

◆ extend_shr_sat_wrap() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shr_sat_wrap	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return sat(a >> (b & 0x1F)).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: sat(a >> (b & 0x1F))

Definition at line 1631 of file math.hpp.

◆ extend_shr_sat_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shr_sat_wrap	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(sat(a >> (b & 0x1F)), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(sat(a >> (b & 0x1F)), c)

Definition at line 1642 of file math.hpp.

◆ extend_shr_wrap() [1/2]

template<typename RetT , typename T >

constexpr RetT syclcompat::extend_shr_wrap	(	T	a,
		uint32_t	b
	)

inlineconstexpr

Extend a and b to 33 bit and return a >> (b & 0x1F).

Parameters

[in]	a	The source value
[in]	b	The offset to shift

Returns: a >> (b & 0x1F)

Definition at line 1609 of file math.hpp.

◆ extend_shr_wrap() [2/2]

template<typename RetT , typename T , typename BinaryOperation >

constexpr RetT syclcompat::extend_shr_wrap	(	T	a,
		uint32_t	b,
		uint32_t	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, and return second_op(a >> (b & 0x1F), c).

Parameters

[in]	a	The source value
[in]	b	The offset to shift
[in]	c	The value to merge
[in]	second_op	The operation to do with the third value

Returns: second_op(a >> (b & 0x1F), c)

Definition at line 1620 of file math.hpp.

◆ extend_sub() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_sub	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and minus them.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The extend subtraction of the two values

Definition at line 1228 of file math.hpp.

◆ extend_sub() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_sub	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, minus a, b, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The extend subtraction of a, b and second_op with c

Definition at line 1245 of file math.hpp.

◆ extend_sub_sat() [1/2]

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_sub_sat	(	AT	a,
		BT	b
	)

inlineconstexpr

Extend a and b to 33 bit and minus them with saturation.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The extend subtraction of the two values with saturation

Definition at line 1257 of file math.hpp.

◆ extend_sub_sat() [2/2]

template<typename RetT , typename AT , typename BT , typename CT , typename BinaryOperation >

constexpr RetT syclcompat::extend_sub_sat	(	AT	a,
		BT	b,
		CT	c,
		BinaryOperation	second_op
	)

inlineconstexpr

Extend Inputs to 33 bit, minus a, b with saturation, then do second_op with c.

Template Parameters

[in]	RetT The type of the return value
[in]	AT The type of the first value
[in]	BT The type of the second value
[in]	CT The type of the third value
[in]	BinaryOperation The type of the second operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	second_op	The operation to do with the third value

Returns: The extend subtraction of a, b with saturation and second_op with c

Definition at line 1276 of file math.hpp.

◆ extend_vabsdiff2()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vabsdiff2	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized abs_diff of the two values

Definition at line 1746 of file math.hpp.

◆ extend_vabsdiff2_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vabsdiff2_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized abs_diff of the two values and the third value

Definition at line 1762 of file math.hpp.

◆ extend_vabsdiff2_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vabsdiff2_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized abs_diff of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized abs_diff of the two values with saturation

Definition at line 1776 of file math.hpp.

◆ extend_vabsdiff4()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vabsdiff4	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized abs_diff of the two values

Definition at line 2044 of file math.hpp.

◆ extend_vabsdiff4_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vabsdiff4_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized abs_diff of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized abs_diff of the two values and the third value

Definition at line 2060 of file math.hpp.

◆ extend_vabsdiff4_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vabsdiff4_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized abs_diff of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized abs_diff of the two values with saturation

Definition at line 2074 of file math.hpp.

◆ extend_vadd2()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vadd2	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized addition of the two values

Definition at line 1658 of file math.hpp.

◆ extend_vadd2_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vadd2_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized addition of the two values and the third value

Definition at line 1674 of file math.hpp.

◆ extend_vadd2_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vadd2_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized addition of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized addition of the two values with saturation

Definition at line 1688 of file math.hpp.

◆ extend_vadd4()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vadd4	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized addition of the two values

Definition at line 1956 of file math.hpp.

◆ extend_vadd4_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vadd4_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized addition of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized addition of the two values and the third value

Definition at line 1972 of file math.hpp.

◆ extend_vadd4_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vadd4_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized addition of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized addition of the two values with saturation

Definition at line 1986 of file math.hpp.

◆ extend_vavrg2()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vavrg2	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized average of the two values

Definition at line 1878 of file math.hpp.

◆ extend_vavrg2_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vavrg2_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend average maximum of the two values and the third value

Definition at line 1895 of file math.hpp.

◆ extend_vavrg2_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vavrg2_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized average of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized average of the two values with saturation

Definition at line 1909 of file math.hpp.

◆ extend_vavrg4()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vavrg4	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized average of the two values

Definition at line 2176 of file math.hpp.

◆ extend_vavrg4_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vavrg4_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized average of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized average of the two values and the third value

Definition at line 2193 of file math.hpp.

◆ extend_vavrg4_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vavrg4_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized average of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized average of the two values with saturation

Definition at line 2207 of file math.hpp.

◆ extend_vcompare2()

template<typename AT , typename BT , typename BinaryOperation >

constexpr unsigned syclcompat::extend_vcompare2	(	AT	a,
		BT	b,
		BinaryOperation	cmp
	)

inlineconstexpr

Extend a and b to 33 bit and vectorized compare input values using specified comparison cmp .

Template Parameters

[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer
[in]	BinaryOperation The type of the compare operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	cmp	The comparsion operator

Returns: The comparison result of the two extended values.

Definition at line 1924 of file math.hpp.

◆ extend_vcompare2_add()

template<typename AT , typename BT , typename BinaryOperation >

constexpr unsigned syclcompat::extend_vcompare2_add	(	AT	a,
		BT	b,
		unsigned	c,
		BinaryOperation	cmp
	)

inlineconstexpr

Extend Inputs to 33 bit, and vectorized compare input values using specified comparison cmp , then add the result with c .

Template Parameters

[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer
[in]	BinaryOperation The type of the compare operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	cmp	The comparsion operator

Returns: The comparison result of the two extended values, and add the result with c .

Definition at line 1941 of file math.hpp.

◆ extend_vcompare4()

template<typename AT , typename BT , typename BinaryOperation >

constexpr unsigned syclcompat::extend_vcompare4	(	AT	a,
		BT	b,
		BinaryOperation	cmp
	)

inlineconstexpr

Extend a and b to 33 bit and vectorized compare input values using specified comparison cmp .

Template Parameters

[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer
[in]	BinaryOperation The type of the compare operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	cmp	The comparsion operator

Returns: The comparison result of the two extended values.

Definition at line 2222 of file math.hpp.

◆ extend_vcompare4_add()

template<typename AT , typename BT , typename BinaryOperation >

constexpr unsigned syclcompat::extend_vcompare4_add	(	AT	a,
		BT	b,
		unsigned	c,
		BinaryOperation	cmp
	)

inlineconstexpr

Extend Inputs to 33 bit, and vectorized compare input values using specified comparison cmp , then add the result with c .

Template Parameters

[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer
[in]	BinaryOperation The type of the compare operation

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value
[in]	cmp	The comparsion operator

Returns: The comparison result of the two extended values, and add the result with c .

Definition at line 2239 of file math.hpp.

◆ extend_vmax2()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmax2	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized maximum of the two values

Definition at line 1834 of file math.hpp.

◆ extend_vmax2_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmax2_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized maximum of the two values and the third value

Definition at line 1850 of file math.hpp.

◆ extend_vmax2_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmax2_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized maximum of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized maximum of the two values with saturation

Definition at line 1864 of file math.hpp.

◆ extend_vmax4()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmax4	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized maximum of the two values

Definition at line 2132 of file math.hpp.

◆ extend_vmax4_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmax4_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized maximum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized maximum of the two values and the third value

Definition at line 2148 of file math.hpp.

◆ extend_vmax4_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmax4_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized maximum of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized maximum of the two values with saturation

Definition at line 2162 of file math.hpp.

◆ extend_vmin2()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmin2	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized minimum of the two values

Definition at line 1790 of file math.hpp.

◆ extend_vmin2_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmin2_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized minimum of the two values and the third value

Definition at line 1806 of file math.hpp.

◆ extend_vmin2_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmin2_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized minimum of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized minimum of the two values with saturation

Definition at line 1820 of file math.hpp.

◆ extend_vmin4()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmin4	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized minimum of the two values

Definition at line 2088 of file math.hpp.

◆ extend_vmin4_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmin4_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized minimum of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized minimum of the two values and the third value

Definition at line 2104 of file math.hpp.

◆ extend_vmin4_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vmin4_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized minimum of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized minimum of the two values with saturation

Definition at line 2118 of file math.hpp.

◆ extend_vsub2()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vsub2	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized subtraction of the two values

Definition at line 1702 of file math.hpp.

◆ extend_vsub2_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vsub2_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized subtraction of the two values and the third value

Definition at line 1718 of file math.hpp.

◆ extend_vsub2_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vsub2_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized subtraction of a and b with saturation, with each value treated as a 2 elements vector type and extend each element to 17 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized subtraction of the two values with saturation

Definition at line 1732 of file math.hpp.

◆ extend_vsub4()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vsub4	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized subtraction of the two values

Definition at line 2000 of file math.hpp.

◆ extend_vsub4_add()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vsub4_add	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized subtraction of a and b, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Then add each half of the result and add with c.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The addition of each half of extend vectorized subtraction of the two values and the third value

Definition at line 2016 of file math.hpp.

◆ extend_vsub4_sat()

template<typename RetT , typename AT , typename BT >

constexpr RetT syclcompat::extend_vsub4_sat	(	AT	a,
		BT	b,
		RetT	c
	)

inlineconstexpr

Compute vectorized subtraction of a and b with saturation, with each value treated as a 4 elements vector type and extend each element to 9 bit.

Template Parameters

[in]	RetT The type of the return value, can only be 32 bit integer
[in]	AT The type of the first value, can only be 32 bit integer
[in]	BT The type of the second value, can only be 32 bit integer

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	c	The third value

Returns: The extend vectorized subtraction of the two values with saturation

Definition at line 2030 of file math.hpp.

◆ fast_length()

float syclcompat::fast_length	(	const float *	a,
		int	len
	)

inline

Compute fast_length for variable-length array.

Parameters

[in]	a	The array
[in]	len	Length of the array

Returns: The computed fast_length

Definition at line 411 of file math.hpp.

References sycl::_V1::ext::intel::math::sqrt().

◆ ffs()

template<typename T >

int syclcompat::ffs ( T a )

inline

Find position of first least significant set bit in an integer.

ffs(0) returns 0.

Parameters

[in] a Input integer value

Returns: The position

Definition at line 198 of file util.hpp.

◆ fill()

template<class T >

static void syclcompat::fill	(	void *	dev_ptr,
		const T &	pattern,
		size_t	count,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Synchronously sets pattern to the first count elements starting from dev_ptr.

The function will return after the fill operation is completed.

Template Parameters

T	Datatype of the value to be set.

Parameters

dev_ptr	Pointer to the device memory address.
pattern	Pattern of type `T` to be set.
count	Number of elements to be set to the patten.
q	The queue in which the operation is done.

Returns: no return value.

Definition at line 819 of file memory.hpp.

References syclcompat::detail::fill(), and sycl::_V1::event::wait().

◆ fill_async()

template<class T >

static sycl::event syclcompat::fill_async	(	void *	dev_ptr,
		const T &	pattern,
		size_t	count,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Asynchronously sets pattern to the first count elements starting from dev_ptr.

The return of the function does NOT guarantee the fill operation is completed.

Template Parameters

T	Datatype of the pattern to be set.

Parameters

dev_ptr	Pointer to the device memory address.
pattern	Pattern of type `T` to be set.
count	Number of elements to be set to the patten.
q	The queue in which the operation is done.

Returns: An event representing the fill operation.

Definition at line 836 of file memory.hpp.

References syclcompat::detail::fill().

◆ filter_device()

static void syclcompat::filter_device ( const std::vector< std::string > & dev_subnames )

inlinestatic

Filter out devices; only keep the device whose name contains one of the subname in dev_subnames.

May break device id mapping and change current device. It's better to be called before other SYCLcompat or SYCL APIs.

Definition at line 924 of file device.hpp.

References syclcompat::detail::dev_mgr::filter(), and syclcompat::detail::dev_mgr::instance().

◆ fmax_nan() [1/2]

template<typename ValueT , typename ValueU >

sycl::vec<std::common_type_t<ValueT, ValueU>, 2> syclcompat::fmax_nan	(	const sycl::vec< ValueT, 2 >	a,
		const sycl::vec< ValueU, 2 >	b
	)

inline

Definition at line 752 of file math.hpp.

References fmax_nan().

◆ fmax_nan() [2/2]

template<typename ValueT , typename ValueU >

std::common_type_t<ValueT, ValueU> syclcompat::fmax_nan	(	const ValueT	a,
		const ValueU	b
	)

inline

Performs 2 elements comparison and returns the bigger one.

If either of inputs is NaN, then return NaN.

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: the bigger value

Definition at line 743 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by fmax_nan().

◆ fmin_nan() [1/2]

template<typename ValueT , typename ValueU >

sycl::vec<std::common_type_t<ValueT, ValueU>, 2> syclcompat::fmin_nan	(	const sycl::vec< ValueT, 2 >	a,
		const sycl::vec< ValueU, 2 >	b
	)

inline

Definition at line 771 of file math.hpp.

References fmin_nan().

◆ fmin_nan() [2/2]

template<typename ValueT , typename ValueU >

std::common_type_t<ValueT, ValueU> syclcompat::fmin_nan	(	const ValueT	a,
		const ValueU	b
	)

inline

Performs 2 elements comparison and returns the smaller one.

If either of inputs is NaN, then return NaN.

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: the smaller value

Definition at line 762 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by fmin_nan().

◆ free()

static void syclcompat::free	(	void *	ptr,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Free the memory ptr on the default queue without synchronizing.

Parameters

ptr	Point to free.

Returns: no return value.

Definition at line 632 of file memory.hpp.

Referenced by syclcompat::device_memory< T, Memory, Dimension >::~device_memory(), and syclcompat::detail::host_buffer::~host_buffer().

◆ funnelshift_l()

unsigned int syclcompat::funnelshift_l	(	unsigned int	low,
		unsigned int	high,
		unsigned int	shift
	)

inline

Emulated function for __funnelshift_l.

Definition at line 384 of file math.hpp.

◆ funnelshift_lc()

unsigned int syclcompat::funnelshift_lc	(	unsigned int	low,
		unsigned int	high,
		unsigned int	shift
	)

inline

Emulated function for __funnelshift_lc.

Definition at line 390 of file math.hpp.

◆ funnelshift_r()

unsigned int syclcompat::funnelshift_r	(	unsigned int	low,
		unsigned int	high,
		unsigned int	shift
	)

inline

Emulated function for __funnelshift_r.

Definition at line 396 of file math.hpp.

◆ funnelshift_rc()

unsigned int syclcompat::funnelshift_rc	(	unsigned int	low,
		unsigned int	high,
		unsigned int	shift
	)

inline

Emulated function for __funnelshift_rc.

Definition at line 402 of file math.hpp.

◆ get_current_device()

static device_ext& syclcompat::get_current_device ( )

inlinestatic

Util function to get the current device.

Definition at line 900 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), and syclcompat::detail::dev_mgr::instance().

Referenced by syclcompat::experimental::calculate_max_active_wg_per_xecore(), syclcompat::experimental::calculate_max_potential_wg(), get_default_context(), and wait_and_free().

◆ get_current_device_id()

static unsigned int syclcompat::get_current_device_id ( )

inlinestatic

Util function to get the id of current device in device manager.

Definition at line 895 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device_id(), and syclcompat::detail::dev_mgr::instance().

◆ get_default_context()

static sycl::context syclcompat::get_default_context ( )

inlinestatic

Util function to get the context of the default queue of current device in device manager.

Definition at line 911 of file device.hpp.

References syclcompat::device_ext::get_context(), and get_current_device().

◆ get_default_queue()

static sycl::queue syclcompat::get_default_queue ( )

inlinestatic

Util function to get the default queue of current device in device manager.

Definition at line 872 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::device_ext::default_queue(), and syclcompat::detail::dev_mgr::instance().

Referenced by launch(), and syclcompat::experimental::launch().

◆ get_device()

static device_ext& syclcompat::get_device ( unsigned int id )

inlinestatic

Util function to get a device by id.

Definition at line 905 of file device.hpp.

References syclcompat::detail::dev_mgr::get_device(), and syclcompat::detail::dev_mgr::instance().

Referenced by sycl::_V1::detail::queue_impl::get_backend_info< info::device::version >(), sycl::_V1::detail::queue_impl::get_backend_info< info::platform::version >(), sycl::_V1::detail::queue_impl::get_info< info::queue::device >(), and sycl::_V1::ext::oneapi::experimental::detail::image_mem_impl::~image_mem_impl().

◆ get_device_id()

static unsigned int syclcompat::get_device_id ( const sycl::device & dev )

inlinestatic

Definition at line 945 of file device.hpp.

References syclcompat::detail::dev_mgr::get_device_id(), and syclcompat::detail::dev_mgr::instance().

◆ get_kernel_function()

static kernel_function syclcompat::get_kernel_function	(	kernel_library &	library,
		const std::string &	name
	)

inlinestatic

Find kernel function in a kernel library and return its address.

Parameters

[in]	library	Handle to the kernel library.
[in]	name	Name of the kernel function.

Definition at line 435 of file kernel.hpp.

◆ get_kernel_function_info() [1/2]

static kernel_function_info syclcompat::get_kernel_function_info ( const void * function )

inlinestatic

Definition at line 75 of file kernel.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::detail::dev_mgr::instance(), and syclcompat::kernel_function_info::max_work_group_size.

◆ get_kernel_function_info() [2/2]

static void syclcompat::get_kernel_function_info	(	kernel_function_info *	kernel_info,
		const void *	function
	)

inlinestatic

Definition at line 66 of file kernel.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::detail::dev_mgr::instance(), and syclcompat::kernel_function_info::max_work_group_size.

◆ get_major_version()

static int syclcompat::get_major_version ( const sycl::device & dev )

static

Definition at line 325 of file device.hpp.

References syclcompat::detail::get_version().

Referenced by syclcompat::device_ext::get_major_version().

◆ get_minor_version()

static int syclcompat::get_minor_version ( const sycl::device & dev )

static

Definition at line 331 of file device.hpp.

References syclcompat::detail::get_version().

Referenced by syclcompat::device_ext::get_minor_version().

◆ get_sycl_language_version()

int syclcompat::get_sycl_language_version ( )

inline

Inherited from the original SYCLomatic compatibility headers.

Returns: compiler's SYCL version if defined, 202000 otherwise.

Definition at line 502 of file util.hpp.

◆ int_as_queue_ptr()

queue_ptr syclcompat::int_as_queue_ptr ( uintptr_t x )

inline

If x <= 2, then return a pointer to the default queue; otherwise, return x reinterpreted as a queue_ptr.

Definition at line 924 of file util.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::device_ext::default_queue(), and syclcompat::detail::dev_mgr::instance().

◆ invoke_kernel_function()

static void syclcompat::invoke_kernel_function	(	kernel_function &	function,
		sycl::queue &	queue,
		sycl::range< 3 >	group_range,
		sycl::range< 3 >	local_range,
		unsigned int	local_mem_size,
		void **	kernel_params,
		void **	extra
	)

inlinestatic

Invoke a kernel function.

Parameters

[in]	function	kernel function.
[in]	queue	SYCL queue used to execute kernel
[in]	group_range	SYCL group range
[in]	local_range	SYCL local range
[in]	local_mem_size	The size of local memory required by the kernel function.
[in]	kernel_params	Array of pointers to kernel arguments.
[in]	extra	Extra arguments.

Definition at line 459 of file kernel.hpp.

◆ isnan()

template<typename ValueT >

std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::isnan ( const ValueT a )

inline

Determine whether 2 element value is NaN.

Parameters

[in] a The input value

Returns: the comparison result

Definition at line 683 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by syclcompat::detail::isnan().

◆ launch() [1/8]

template<auto F, typename... Args>

std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch	(	const dim3 &	grid,
		const dim3 &	threads,
		Args...	args
	)

Definition at line 136 of file launch.hpp.

References get_default_queue().

◆ launch() [2/8]

template<auto F, typename... Args>

sycl::event syclcompat::launch	(	const dim3 &	grid,
		const dim3 &	threads,
		size_t	mem_size,
		Args...	args
	)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue.

Template Parameters

F	SYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dim	nd_range dimension number.
Args	Types of the arguments to be passed to the kernel.

Parameters

grid	Grid dimensions represented with an (x, y, z) iteration space.
threads	Block dimensions represented with an (x, y, z) iteration space.
mem_size	The size, in number of bytes, of the local memory to be allocated.
args	The arguments to be passed to the kernel.

Returns: A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 218 of file launch.hpp.

References get_default_queue().

◆ launch() [3/8]

template<auto F, typename... Args>

sycl::event syclcompat::launch	(	const dim3 &	grid,
		const dim3 &	threads,
		size_t	mem_size,
		sycl::queue	q,
		Args...	args
	)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue.

Template Parameters

F	SYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dim	nd_range dimension number.
Args	Types of the arguments to be passed to the kernel.

Parameters

grid	Grid dimensions represented with an (x, y, z) iteration space.
threads	Block dimensions represented with an (x, y, z) iteration space.
mem_size	The size, in number of bytes, of the local memory to be allocated for kernel.
args	The arguments to be passed to the kernel.

Returns: A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 196 of file launch.hpp.

◆ launch() [4/8]

template<auto F, typename... Args>

std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch	(	const dim3 &	grid,
		const dim3 &	threads,
		sycl::queue	q,
		Args...	args
	)

Definition at line 130 of file launch.hpp.

◆ launch() [5/8]

template<auto F, int Dim, typename... Args>

std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch	(	const sycl::nd_range< Dim > &	range,
		Args...	args
	)

Definition at line 123 of file launch.hpp.

References get_default_queue().

◆ launch() [6/8]

template<auto F, int Dim, typename... Args>

sycl::event syclcompat::launch	(	const sycl::nd_range< Dim > &	range,
		size_t	mem_size,
		Args...	args
	)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue.

Template Parameters

F	SYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dim	nd_range dimension number.
Args	Types of the arguments to be passed to the kernel.

Parameters

range	Nd_range specifying the work group and global sizes for the kernel.
mem_size	The size, in number of bytes, of the local memory to be allocated for kernel.
args	The arguments to be passed to the kernel.

Returns: A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 175 of file launch.hpp.

References get_default_queue().

◆ launch() [7/8]

template<auto F, int Dim, typename... Args>

sycl::event syclcompat::launch	(	const sycl::nd_range< Dim > &	range,
		size_t	mem_size,
		sycl::queue	q,
		Args...	args
	)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue.

Template Parameters

F	SYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dim	nd_range dimension number.
Args	Types of the arguments to be passed to the kernel.

Parameters

range	Nd_range specifying the work group and global sizes for the kernel.
q	The SYCL queue on which to execute the kernel.
mem_size	The size, in number of bytes, of the local memory to be allocated for kernel.
args	The arguments to be passed to the kernel.

Returns: A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 155 of file launch.hpp.

◆ launch() [8/8]

template<auto F, int Dim, typename... Args>

std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch	(	const sycl::nd_range< Dim > &	range,
		sycl::queue	q,
		Args...	args
	)

Definition at line 117 of file launch.hpp.

◆ length()

template<typename ValueT >

ValueT syclcompat::length	(	const ValueT *	a,
		const int	len
	)

inline

Calculate the square root of the input array.

Parameters

[in]	a	The array pointer
[in]	len	Length of the array

Returns: The square root

Definition at line 436 of file math.hpp.

References sycl::_V1::ext::intel::math::sqrt().

Referenced by syclcompat::device_info::set_name(), and sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::writeRegion().

◆ list_devices()

static void syclcompat::list_devices ( )

inlinestatic

List all the devices with its id in dev_mgr.

Definition at line 929 of file device.hpp.

References syclcompat::detail::dev_mgr::instance(), and syclcompat::detail::dev_mgr::list_devices().

◆ load_kernel_library()

static kernel_library syclcompat::load_kernel_library ( const std::string & name )

inlinestatic

Load kernel library and return a handle to use the library.

Parameters

[in] name The name of the library.

Definition at line 386 of file kernel.hpp.

References syclcompat::detail::load_dl_from_data().

◆ load_kernel_library_mem()

static kernel_library syclcompat::load_kernel_library_mem ( char const *const image )

inlinestatic

Load kernel library whose image is alreay in memory and return a handle to use the library.

Parameters

[in] image A pointer to the image in memory.

Definition at line 400 of file kernel.hpp.

References syclcompat::detail::get_lib_size(), and syclcompat::detail::load_dl_from_data().

◆ local_mem()

template<typename AllocT >

auto* syclcompat::local_mem ( )

Definition at line 70 of file memory.hpp.

Referenced by syclcompat::detail::launch().

◆ malloc() [1/3]

static void* syclcompat::malloc	(	size_t &	pitch,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Allocate memory block for 2D array on the device.

Parameters

[out]	pitch	Aligned size of x in bytes.
	x	Range in dim x.
	y	Range in dim y.
	q	Queue to execute the allocate task.

Returns: A pointer to the newly allocated memory.

Definition at line 611 of file memory.hpp.

References syclcompat::detail::malloc().

Referenced by syclcompat::device_memory< T, Memory, Dimension >::device_memory().

◆ malloc() [2/3]

static void* syclcompat::malloc	(	size_t	count,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Allocate memory block on the device.

Parameters

num_bytes	Number of bytes to allocate.
q	Queue to execute the allocate task.

Returns: A pointer to the newly allocated memory.

Parameters

T	Datatype to allocate
count	Number of elements to allocate.
q	Queue to execute the allocate task.

Returns: A pointer to the newly allocated memory.

Definition at line 537 of file memory.hpp.

References syclcompat::detail::malloc().

◆ malloc() [3/3]

static pitched_data syclcompat::malloc	(	sycl::range< 3 >	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Allocate memory block for 3D array on the device.

Parameters

size	Size of the memory block, in bytes.
q	Queue to execute the allocate task.

Returns: A pitched_data object which stores the memory info.

Definition at line 595 of file memory.hpp.

References sycl::_V1::detail::array< dimensions >::get(), syclcompat::detail::malloc(), syclcompat::pitched_data::set_data_ptr(), and syclcompat::pitched_data::set_pitch().

◆ malloc_host()

static void* syclcompat::malloc_host	(	size_t	count,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Allocate memory block on the host.

Parameters

num_bytes	Number of bytes to allocate.
q	Queue to execute the allocate task.

Returns: A pointer to the newly allocated memory.

Parameters

T	Datatype to allocate
num_bytes	Number of bytes to allocate.
q	Queue to execute the allocate task.

Returns: A pointer to the newly allocated memory.

Definition at line 556 of file memory.hpp.

◆ malloc_shared()

static void* syclcompat::malloc_shared	(	size_t	count,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Allocate memory block of usm_shared memory.

Parameters

num_bytes	Number of bytes to allocate.
q	Queue to execute the allocate task.

Returns: A pointer to the newly allocated memory.

Definition at line 576 of file memory.hpp.

◆ match_all_over_sub_group()

template<typename T >

unsigned int syclcompat::match_all_over_sub_group	(	sycl::sub_group	g,
		unsigned	member_mask,
		T	value,
		int *	pred
	)

The function match_all_over_sub_group conducts a comparison of values across work-items within a sub-group.

match_all_over_sub_group return member_mask and predicate pred will be set to 1 if all value that provided by each work-item in member_mask are equal, otherwise return 0 and the predicate pred will be set to 0. The n-th bit of member_mask representing the work-item with id n. The parameter member_mask indicating the work-items participating the call.

Template Parameters

T	Input value type

Parameters

[in]	g	Input sub_group
[in]	member_mask	Input mask
[in]	value	Input value
[out]	pred	Output predicate

Returns: The result

Definition at line 561 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ match_any_over_sub_group()

template<typename T >

unsigned int syclcompat::match_any_over_sub_group	(	sycl::sub_group	g,
		unsigned	member_mask,
		T	value
	)

The function match_any_over_sub_group conducts a comparison of values across work-items within a sub-group.

match_any_over_sub_group return a mask in which some bits are set to 1, indicating that the value provided by the work-item represented by these bits are equal. The n-th bit of mask representing the work-item with id n. The parameter member_mask indicating the work-items participating the call.

Template Parameters

T	Input value type

Parameters

[in]	g	Input sub_group
[in]	member_mask	Input mask
[in]	value	Input value

Returns: The result

Definition at line 522 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ max() [1/3]

sycl::half syclcompat::max	(	sycl::half	a,
		sycl::half	b
	)

inline

Definition at line 735 of file math.hpp.

Referenced by syclcompat::detail::extend_binary(), syclcompat::detail::extend_vbinary2(), and syclcompat::detail::extend_vbinary4().

◆ max() [2/3]

template<typename ValueT , typename ValueU >

std::enable_if_t<std::is_integral_v<ValueT> && std::is_integral_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::max	(	ValueT	a,
		ValueU	b
	)

inline

Definition at line 723 of file math.hpp.

◆ max() [3/3]

template<typename ValueT , typename ValueU >

std::enable_if_t<std::is_floating_point_v<ValueT> && std::is_floating_point_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::max	(	ValueT	a,
		ValueU	b
	)

inline

Definition at line 731 of file math.hpp.

◆ memcpy() [1/4]

static void syclcompat::memcpy	(	pitched_data	to,
		sycl::id< 3 >	to_pos,
		pitched_data	from,
		sycl::id< 3 >	from_pos,
		sycl::range< 3 >	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Synchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size.

Parameters

to	Destination matrix info.
to_pos	Position of destination.
from	Source matrix info.
from_pos	Position of destination.
size	Range of the submatrix to be copied.
q	Queue to execute the copy task.

Returns: no return value.

Definition at line 782 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memcpy() [2/4]

template<typename T >

static void syclcompat::memcpy	(	type_identity_t< T > *	to_ptr,
		const type_identity_t< T > *	from_ptr,
		size_t	count,
		sycl::queue	q = `get_default_queue()`
	)

static

Synchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Template Parameters

T	Datatype to be copied.

Parameters

to_ptr	Pointer to destination memory address.
from_ptr	Pointer to source memory address.
count	Number of T to be copied.
q	Queue to execute the copy task.

Returns: no return value.

Definition at line 719 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::event::wait().

◆ memcpy() [3/4]

static void syclcompat::memcpy	(	void *	to_ptr,
		const void *	from_ptr,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

static

Synchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Parameters

to_ptr	Pointer to destination memory address.
from_ptr	Pointer to source memory address.
size	Number of bytes to be copied.
q	Queue to execute the copy task.

Returns: no return value.

Definition at line 671 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::event::wait().

◆ memcpy() [4/4]

static void syclcompat::memcpy	(	void *	to_ptr,
		size_t	to_pitch,
		const void *	from_ptr,
		size_t	from_pitch,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Synchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The function will return after the copy is completed.

Parameters

to_ptr	Pointer to destination memory address.
to_pitch	Range of dim x in bytes of destination matrix.
from_ptr	Pointer to source memory address.
from_pitch	Range of dim x in bytes of source matrix.
x	Range of dim x of matrix to be copied.
y	Range of dim y of matrix to be copied.
q	Queue to execute the copy task.

Returns: no return value.

Definition at line 741 of file memory.hpp.

References syclcompat::detail::memcpy(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memcpy_async() [1/4]

static sycl::event syclcompat::memcpy_async	(	pitched_data	to,
		sycl::id< 3 >	to_pos,
		pitched_data	from,
		sycl::id< 3 >	from_pos,
		sycl::range< 3 >	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Asynchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size. The return of the function does NOT guarantee the copy is completed.

Parameters

to	Destination matrix info.
to_pos	Position of destination.
from	Source matrix info.
from_pos	Position of destination.
size	Range of the submatrix to be copied.
q	Queue to execute the copy task.

Returns: An event representing the memcpy operation.

Definition at line 801 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memcpy().

◆ memcpy_async() [2/4]

template<typename T >

static sycl::event syclcompat::memcpy_async	(	type_identity_t< T > *	to_ptr,
		const type_identity_t< T > *	from_ptr,
		size_t	count,
		sycl::queue	q = `get_default_queue()`
	)

static

Asynchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Template Parameters

T	Datatype to be copied.

Parameters

to_ptr	Pointer to destination memory address.
from_ptr	Pointer to source memory address.
count	Number of T to be copied.
q	Queue to execute the copy task.

Returns: no return value.

Definition at line 702 of file memory.hpp.

References syclcompat::detail::memcpy().

◆ memcpy_async() [3/4]

static sycl::event syclcompat::memcpy_async	(	void *	to_ptr,
		const void *	from_ptr,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

static

Asynchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters

to_ptr	Pointer to destination memory address.
from_ptr	Pointer to source memory address.
size	Number of bytes to be copied.
q	Queue to execute the copy task.

Returns: no return value.

Definition at line 685 of file memory.hpp.

References syclcompat::detail::memcpy().

◆ memcpy_async() [4/4]

static sycl::event syclcompat::memcpy_async	(	void *	to_ptr,
		size_t	to_pitch,
		const void *	from_ptr,
		size_t	from_pitch,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Asynchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters

to_ptr	Pointer to destination memory address.
to_pitch	Range of dim x in bytes of destination matrix.
from_ptr	Pointer to source memory address.
from_pitch	Range of dim x in bytes of source matrix.
x	Range of dim x of matrix to be copied.
y	Range of dim y of matrix to be copied.
q	Queue to execute the copy task.

Returns: An event representing the memcpy operation.

Definition at line 762 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memcpy().

◆ memset() [1/3]

static void syclcompat::memset	(	pitched_data	pitch,
		int	val,
		sycl::range< 3 >	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The function will return after the memset operation is completed.

Parameters

pitch	Specify the 3D memory region.
value	Value to be set.
size	The setted 3D memory size.
q	The queue in which the operation is done.

Returns: no return value.

Definition at line 1038 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset() [2/3]

static void syclcompat::memset	(	void *	dev_ptr,
		int	value,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

static

Synchronously sets value to the first size bytes starting from dev_ptr.

The function will return after the memset operation is completed.

Parameters

dev_ptr	Pointer to the device memory address.
value	Value to be set.
size	Number of bytes to be set to the value.
q	The queue in which the operation is done.

Returns: no return value.

Definition at line 875 of file memory.hpp.

References syclcompat::detail::memset(), and sycl::_V1::event::wait().

◆ memset() [3/3]

static void syclcompat::memset	(	void *	ptr,
		size_t	pitch,
		int	val,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters

[in]	ptr	Pointer to the virtual device memory.
[in]	pitch	The pitch size by number of elements, including padding.
[in]	val	The value to be set.
[in]	x	The width of memory region by number of elements.
[in]	y	The height of memory region by number of elements.
[in]	q	The queue in which the operation is done.

Definition at line 947 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_async() [1/3]

static sycl::event syclcompat::memset_async	(	pitched_data	pitch,
		int	val,
		sycl::range< 3 >	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The return of the function does NOT guarantee the memset operation is completed.

Parameters

pitch	Specify the 3D memory region.
value	Value to be set.
size	The setted 3D memory size.
q	The queue in which the operation is done.

Returns: An event representing the memset operation.

Definition at line 1052 of file memory.hpp.

References syclcompat::detail::combine_events().

◆ memset_async() [2/3]

static sycl::event syclcompat::memset_async	(	void *	dev_ptr,
		int	value,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 1 byte data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters

dev_ptr	Pointer to the device memory address.
value	Value to be set.
size	Number of bytes to be set to the value.

Returns: An event representing the memset operation.

Definition at line 908 of file memory.hpp.

References syclcompat::detail::memset().

◆ memset_async() [3/3]

static sycl::event syclcompat::memset_async	(	void *	ptr,
		size_t	pitch,
		int	val,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 1 byte data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters

[in]	ptr	Pointer to the virtual device memory.
[in]	pitch	The pitch size by number of elements, including padding.
[in]	val	The value to be set.
[in]	x	The width of memory region by number of elements.
[in]	y	The height of memory region by number of elements.
[in]	q	The queue in which the operation is done.

Returns: An event representing the memset operation.

Definition at line 989 of file memory.hpp.

References syclcompat::detail::combine_events().

◆ memset_d16() [1/2]

static void syclcompat::memset_d16	(	void *	dev_ptr,
		unsigned short	value,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 2 bytes data value to the first size elements starting from dev_ptr in q synchronously.

Parameters

[in]	dev_ptr	Pointer to the virtual device memory address.
[in]	value	The value to be set.
[in]	size	Number of elements to be set to the value.
[in]	q	The queue in which the operation is done.

Definition at line 886 of file memory.hpp.

◆ memset_d16() [2/2]

static void syclcompat::memset_d16	(	void *	ptr,
		size_t	pitch,
		unsigned short	val,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters

[in]	ptr	Pointer to the virtual device memory.
[in]	pitch	The pitch size by number of elements, including padding.
[in]	val	The value to be set.
[in]	x	The width of memory region by number of elements.
[in]	y	The height of memory region by number of elements.
[in]	q	The queue in which the operation is done.

Definition at line 960 of file memory.hpp.

References syclcompat::detail::memset(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_d16_async() [1/2]

static sycl::event syclcompat::memset_d16_async	(	void *	dev_ptr,
		unsigned short	value,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 2 bytes data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters

[in]	dev_ptr	Pointer to the virtual device memory address.
[in]	value	The value to be set.
[in]	size	Number of elements to be set to the value.
[in]	q	The queue in which the operation is done.

Returns: An event representing the memset operation.

Definition at line 921 of file memory.hpp.

◆ memset_d16_async() [2/2]

static sycl::event syclcompat::memset_d16_async	(	void *	ptr,
		size_t	pitch,
		unsigned short	val,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 2 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters

[in]	ptr	Pointer to the virtual device memory.
[in]	pitch	The pitch size by number of elements, including padding.
[in]	val	The value to be set.
[in]	x	The width of memory region by number of elements.
[in]	y	The height of memory region by number of elements.
[in]	q	The queue in which the operation is done.

Returns: An event representing the memset operation.

Definition at line 1007 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memset().

◆ memset_d32() [1/2]

static void syclcompat::memset_d32	(	void *	dev_ptr,
		unsigned int	value,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 4 bytes data value to the first size elements starting from dev_ptr in q synchronously.

Parameters

[in]	dev_ptr	Pointer to the virtual device memory address.
[in]	value	The value to be set.
[in]	size	Number of elements to be set to the value.
[in]	q	The queue in which the operation is done.

Definition at line 897 of file memory.hpp.

◆ memset_d32() [2/2]

static void syclcompat::memset_d32	(	void *	ptr,
		size_t	pitch,
		unsigned int	val,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q synchronously.

Parameters

[in]	ptr	Pointer to the virtual device memory.
[in]	pitch	The pitch size by number of elements, including padding.
[in]	val	The value to be set.
[in]	x	The width of memory region by number of elements.
[in]	y	The height of memory region by number of elements.
[in]	q	The queue in which the operation is done.

Definition at line 974 of file memory.hpp.

References syclcompat::detail::memset(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_d32_async() [1/2]

static sycl::event syclcompat::memset_d32_async	(	void *	dev_ptr,
		unsigned int	value,
		size_t	size,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 4 bytes data value to the first size elements starting from dev_ptr in q asynchronously.

Parameters

[in]	dev_ptr	Pointer to the virtual device memory address.
[in]	value	The value to be set.
[in]	size	Number of elements to be set to the value.
[in]	q	The queue in which the operation is done.

Returns: An event representing the memset operation.

Definition at line 934 of file memory.hpp.

◆ memset_d32_async() [2/2]

static sycl::event syclcompat::memset_d32_async	(	void *	ptr,
		size_t	pitch,
		unsigned int	val,
		size_t	x,
		size_t	y,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Sets 4 bytes data val to the pitched 2D memory region pointed by ptr in q asynchronously.

Parameters

[in]	ptr	Pointer to the virtual device memory.
[in]	pitch	The pitch size by number of elements, including padding.
[in]	val	The value to be set.
[in]	x	The width of memory region by number of elements.
[in]	y	The height of memory region by number of elements.
[in]	q	The queue in which the operation is done.

Returns: An event representing the memset operation.

Definition at line 1023 of file memory.hpp.

References syclcompat::detail::combine_events(), and syclcompat::detail::memset().

◆ min() [1/3]

sycl::half syclcompat::min	(	sycl::half	a,
		sycl::half	b
	)

inline

Definition at line 717 of file math.hpp.

Referenced by syclcompat::detail::extend_binary(), syclcompat::detail::extend_vbinary2(), and syclcompat::detail::extend_vbinary4().

◆ min() [2/3]

template<typename ValueT , typename ValueU >

std::enable_if_t<std::is_integral_v<ValueT> && std::is_integral_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::min	(	ValueT	a,
		ValueU	b
	)

inline

Definition at line 705 of file math.hpp.

◆ min() [3/3]

template<typename ValueT , typename ValueU >

std::enable_if_t<std::is_floating_point_v<ValueT> && std::is_floating_point_v<ValueU>, std::common_type_t<ValueT, ValueU> > syclcompat::min	(	ValueT	a,
		ValueU	b
	)

inline

Definition at line 713 of file math.hpp.

◆ operator*()

dim3 syclcompat::operator*	(	const dim3 &	a,
		const dim3 &	b
	)

inline

Definition at line 61 of file dims.hpp.

◆ operator+()

dim3 syclcompat::operator+	(	const dim3 &	a,
		const dim3 &	b
	)

inline

Definition at line 65 of file dims.hpp.

◆ operator-()

dim3 syclcompat::operator-	(	const dim3 &	a,
		const dim3 &	b
	)

inline

Definition at line 69 of file dims.hpp.

◆ permute_sub_group_by_xor()

template<typename T >

T syclcompat::permute_sub_group_by_xor	(	sycl::sub_group	g,
		T	x,
		unsigned int	mask,
		int	logical_sub_group_size = `32`
	)

permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is bitwise exclusive OR of the caller's id and mask. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters

T	Input value type

Parameters

[in]	g	Input sub_group
[in]	x	Input value
[in]	mask	Input mask
[in]	logical_sub_group_size	Input logical sub_group size

Returns: The result

Definition at line 298 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ pow() [1/4]

double syclcompat::pow	(	const double	a,
		const int	b
	)

inline

Definition at line 777 of file math.hpp.

◆ pow() [2/4]

float syclcompat::pow	(	const float	a,
		const int	b
	)

inline

Definition at line 776 of file math.hpp.

◆ pow() [3/4]

template<typename ValueT , typename ValueU >

std::enable_if_t<std::is_floating_point_v<ValueT>, ValueT> syclcompat::pow	(	const ValueT	a,
		const ValueU	b
	)

inline

Definition at line 781 of file math.hpp.

References pow().

◆ pow() [4/4]

template<typename ValueT , typename ValueU >

std::enable_if_t<!std::is_floating_point_v<ValueT>, double> syclcompat::pow	(	const ValueT	a,
		const ValueU	b
	)

inline

Definition at line 790 of file math.hpp.

Referenced by pow().

◆ relu() [1/3]

template<class ValueT >

std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, sycl::marray<ValueT, 2> > syclcompat::relu ( const sycl::marray< ValueT, 2 > a )

inline

Definition at line 817 of file math.hpp.

References relu().

◆ relu() [2/3]

template<class ValueT >

std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, sycl::vec<ValueT, 2> > syclcompat::relu ( const sycl::vec< ValueT, 2 > a )

inline

Definition at line 810 of file math.hpp.

References relu().

◆ relu() [3/3]

template<typename ValueT >

std::enable_if_t<std::is_floating_point_v<ValueT> || std::is_same_v<sycl::half, ValueT>, ValueT> syclcompat::relu ( const ValueT a )

inline

Performs relu saturation.

Parameters

[in] a The input value

Returns: the relu saturation result

Definition at line 801 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by relu().

◆ reverse_bits()

template<typename T >

T syclcompat::reverse_bits ( T a )

inline

Reverse the bit order of an unsigned integer.

Parameters

[in] a Input unsigned integer value

Returns: Value of a with the bit order reversed

Definition at line 161 of file util.hpp.

◆ select_device() [1/2]

template<class DeviceSelector >

static std::enable_if_t< std::is_invocable_r_v<int, DeviceSelector, const sycl::device &> > syclcompat::select_device ( const DeviceSelector & selector = sycl::gpu_selector_v )

inlinestatic

Definition at line 941 of file device.hpp.

References syclcompat::detail::dev_mgr::instance(), and syclcompat::detail::dev_mgr::select_device().

◆ select_device() [2/2]

static unsigned int syclcompat::select_device ( unsigned int id )

inlinestatic

Definition at line 933 of file device.hpp.

References syclcompat::detail::dev_mgr::instance(), and syclcompat::detail::dev_mgr::select_device().

◆ select_from_sub_group()

template<typename T >

T syclcompat::select_from_sub_group	(	sycl::sub_group	g,
		T	x,
		int	remote_local_id,
		int	logical_sub_group_size = `32`
	)

select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size

1]. Each work-item in logical sub_group gets value from another work-item whose id is remote_local_id. If remote_local_id is outside the logical sub_group id range, remote_local_id will modulo with logical_sub_group_size. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.
Template Parameters

T Input value type

Parameters

[in] g Input sub_group

[in] x Input value

[in] remote_local_id Input source work item id

[in] logical_sub_group_size Input logical sub_group size

Returns
The result

Definition at line 218 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ set_default_queue()

static void syclcompat::set_default_queue ( const sycl::queue & q )

inlinestatic

Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well.

This function will be blocking if there are submitted kernels in the previous default queue.

Parameters

q	New user-defined queue

Definition at line 883 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::detail::dev_mgr::instance(), and syclcompat::device_ext::set_default_queue().

◆ shift_sub_group_left()

template<typename T >

T syclcompat::shift_sub_group_left	(	sycl::sub_group	g,
		T	x,
		unsigned int	delta,
		int	logical_sub_group_size = `32`
	)

shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is caller's id adds delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters

T	Input value type

Parameters

[in]	g	Input sub_group
[in]	x	Input value
[in]	delta	Input delta
[in]	logical_sub_group_size	Input logical sub_group size

Returns: The result

Definition at line 242 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ shift_sub_group_right()

template<typename T >

T syclcompat::shift_sub_group_right	(	sycl::sub_group	g,
		T	x,
		unsigned int	delta,
		int	logical_sub_group_size = `32`
	)

shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right.

The input sub_group will be divided into several logical_sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical_sub_group gets value from another work-item whose id is caller's id subtracts delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters

T	Input value type

Parameters

[in]	g	Input sub_group
[in]	x	Input value
[in]	delta	Input delta
[in]	logical_sub_group_size	Input logical sub_group size

Returns: The result

Definition at line 270 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ unload_kernel_library()

static void syclcompat::unload_kernel_library ( const kernel_library & library )

inlinestatic

Unload kernel library.

Parameters

[in,out] library Handle to the library to be closed.

Definition at line 408 of file kernel.hpp.

◆ unordered_compare() [1/2]

template<typename ValueT , class BinaryOperation >

std::enable_if_t< std::is_same_v<std::invoke_result_t<BinaryOperation, ValueT, ValueT>, bool>, bool> syclcompat::unordered_compare	(	const ValueT	a,
		const ValueT	b,
		const BinaryOperation	binary_op
	)

inline

Performs unordered comparison.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 495 of file math.hpp.

References syclcompat::detail::isnan().

Referenced by unordered_compare(), unordered_compare_both(), and unordered_compare_mask().

◆ unordered_compare() [2/2]

template<typename ValueT , class BinaryOperation >

std::enable_if_t<ValueT::size() == 2, ValueT> syclcompat::unordered_compare	(	const ValueT	a,
		const ValueT	b,
		const BinaryOperation	binary_op
	)

inline

Performs 2 element unordered comparison.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 507 of file math.hpp.

References unordered_compare().

◆ unordered_compare_both()

template<typename ValueT , class BinaryOperation >

std::enable_if_t<ValueT::size() == 2, bool> syclcompat::unordered_compare_both	(	const ValueT	a,
		const ValueT	b,
		const BinaryOperation	binary_op
	)

inline

Performs 2 element unordered comparison and return true if both results are true.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 532 of file math.hpp.

References unordered_compare().

◆ unordered_compare_mask()

template<typename ValueT , class BinaryOperation >

unsigned syclcompat::unordered_compare_mask	(	const sycl::vec< ValueT, 2 >	a,
		const sycl::vec< ValueT, 2 >	b,
		const BinaryOperation	binary_op
	)

inline

Performs 2 elements unordered comparison, compare result of each element is 0 (false) or 0xffff (true), returns an unsigned int by composing compare result of two elements.

Parameters

[in]	a	The first value
[in]	b	The second value
[in]	binary_op	functor that implements the binary operation

Returns: the comparison result

Definition at line 562 of file math.hpp.

References unordered_compare().

◆ vectorized_binary()

template<typename VecT , class BinaryOperation >

unsigned syclcompat::vectorized_binary	(	unsigned	a,
		unsigned	b,
		const BinaryOperation	binary_op
	)

inline

Compute vectorized binary operation value for two values, with each value treated as a vector type VecT.

Template Parameters

[in]	VecT The type of the vector
[in]	BinaryOperation The binary operation class

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The vectorized binary operation value of the two values

Definition at line 984 of file math.hpp.

◆ vectorized_isgreater()

template<typename S , typename T >

T syclcompat::vectorized_isgreater	(	T	a,
		T	b
	)

inline

Compute vectorized isgreater for two values, with each value treated as a vector type S.

Parameters

[in]	S	The type of the vector
[in]	T	The type of the original values
[in]	a	The first value
[in]	b	The second value

Returns: The vectorized greater than of the two values

Definition at line 642 of file math.hpp.

◆ vectorized_isgreater< sycl::ushort2, unsigned >()

template<>

unsigned syclcompat::vectorized_isgreater< sycl::ushort2, unsigned >	(	unsigned	a,
		unsigned	b
	)

inline

Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short.

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The vectorized greater than of the two values

Definition at line 657 of file math.hpp.

◆ vectorized_max()

template<typename S , typename T >

T syclcompat::vectorized_max	(	T	a,
		T	b
	)

inline

Compute vectorized max for two values, with each value treated as a vector type S.

Parameters

[in]	S	The type of the vector
[in]	T	The type of the original values
[in]	a	The first value
[in]	b	The second value

Returns: The vectorized max of the two values

Definition at line 576 of file math.hpp.

◆ vectorized_min()

template<typename S , typename T >

T syclcompat::vectorized_min	(	T	a,
		T	b
	)

inline

Compute vectorized min for two values, with each value treated as a vector type S.

Parameters

[in]	S	The type of the vector
[in]	T	The type of the original values
[in]	a	The first value
[in]	b	The second value

Returns: The vectorized min of the two values

Definition at line 592 of file math.hpp.

◆ vectorized_sum_abs_diff()

template<typename VecT >

unsigned syclcompat::vectorized_sum_abs_diff	(	unsigned	a,
		unsigned	b
	)

inline

Compute vectorized absolute difference for two values without modulo overflow, with each value treated as a vector type VecT.

Template Parameters

[in]	VecT The type of the vector

Parameters

[in]	a	The first value
[in]	b	The second value

Returns: The vectorized absolute difference of the two values

Definition at line 623 of file math.hpp.

References sycl::_V1::ext::intel::esimd::detail::sum().

◆ vectorized_unary()

template<typename VecT , class UnaryOperation >

unsigned syclcompat::vectorized_unary	(	unsigned	a,
		const UnaryOperation	unary_op
	)

inline

Compute vectorized unary operation for a value, with the value treated as a vector type VecT.

Template Parameters

[in]	VecT The type of the vector
[in]	UnaryOperation The unary operation class

Parameters

[in] a The input value

Returns: The vectorized unary operation value of the input value

Definition at line 608 of file math.hpp.

◆ wait()

static void syclcompat::wait ( sycl::queue q = get_default_queue() )

inlinestatic

Definition at line 887 of file device.hpp.

Referenced by syclcompat::device_ext::~device_ext().

◆ wait_and_free()

static void syclcompat::wait_and_free	(	void *	ptr,
		sycl::queue	q = `get_default_queue()`
	)

inlinestatic

Wait on the queue q and free the memory ptr.

Parameters

ptr	Point to free.
q	Queue to execute the free task.

Returns: no return value.

Definition at line 620 of file memory.hpp.

References get_current_device(), syclcompat::device_ext::queues_wait_and_throw(), and sycl::_V1::queue::wait().

◆ wait_and_throw()

static void syclcompat::wait_and_throw ( sycl::queue q = get_default_queue() )

inlinestatic

Definition at line 889 of file device.hpp.

◆ wg_barrier()

void syclcompat::wg_barrier ( )

inline

Definition at line 32 of file id_query.hpp.

Namespaces

Classes

Typedefs

Enumerations

Functions

Typedef Documentation

◆ arith_t

◆ byte_t

◆ constant_memory

◆ device_ptr

◆ dot_product_acc_t

◆ err0

◆ err1

◆ event_ptr

◆ global_memory

◆ kernel_functor

◆ queue_ptr

◆ shared_memory

◆ type_identity_t

Enumeration Type Documentation

◆ error_code

◆ memory_region

◆ target

Function Documentation

◆ atomic_compare_exchange_strong() [1/2]

◆ atomic_compare_exchange_strong() [2/2]

◆ atomic_exchange()

◆ atomic_fetch_add()

◆ atomic_fetch_and()

◆ atomic_fetch_compare_dec()

◆ atomic_fetch_compare_inc()

◆ atomic_fetch_max()

◆ atomic_fetch_min()

◆ atomic_fetch_or()

◆ atomic_fetch_sub()

◆ atomic_fetch_xor()

◆ bfe_safe()

◆ bfi_safe()

◆ byte_level_permute()

◆ cabs()

◆ cast_double_to_int()

◆ cast_ints_to_double()

◆ cbrt()

◆ cdiv()

◆ clamp()

◆ cmul()

◆ cmul_add() [1/2]

◆ cmul_add() [2/2]

◆ compare() [1/3]

◆ compare() [2/3]

◆ compare() [3/3]

◆ compare_both()

◆ compare_mask()

◆ compute_nd_range() [1/2]

◆ compute_nd_range() [2/2]

◆ conj()

◆ cpu_device()

◆ create_queue()

◆ destroy_event()

◆ device_count()

◆ dp2a_hi()

◆ dp2a_lo()

◆ dp4a()

◆ enqueue_free()

◆ extend_absdiff() [1/2]

◆ extend_absdiff() [2/2]

◆ extend_absdiff_sat() [1/2]

◆ extend_absdiff_sat() [2/2]

◆ extend_add() [1/2]

◆ extend_add() [2/2]

◆ extend_add_sat() [1/2]

◆ extend_add_sat() [2/2]

◆ extend_max() [1/2]

◆ extend_max() [2/2]

◆ extend_max_sat() [1/2]

◆ extend_max_sat() [2/2]

◆ extend_min() [1/2]

◆ extend_min() [2/2]

◆ extend_min_sat() [1/2]

◆ extend_min_sat() [2/2]