DPC++ Runtime
Runtime libraries for oneAPI DPC++
syclcompat Namespace Reference

Namespaces

 detail
 Atomic extension to implement standard APIs in std::atomic.
 
 experimental
 
 global_id
 
 global_range
 
 local_id
 
 local_range
 
 work_group_id
 
 work_group_range
 

Classes

class  atomic
 
class  device_info
 
class  device_ext
 device extension More...
 
class  dim3
 
struct  kernel_function_info
 
class  pitched_data
 Pitched 2D/3D memory data. More...
 
class  accessor
 accessor used as device function parameter. More...
 
class  accessor< T, Memory, 3 >
 
class  accessor< T, Memory, 2 >
 
class  device_memory
 Device variable with address space of shared or global. More...
 
class  device_memory< T, Memory, 0 >
 
class  pointer_attributes
 
struct  type_identity
 
struct  arith
 

Typedefs

using event_ptr = sycl::event *
 
using queue_ptr = sycl::queue *
 
typedef void(* kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)
 
using byte_t = uint8_t
 
template<class T , size_t Dimension>
using global_memory = device_memory< T, memory_region::global, Dimension >
 
template<class T , size_t Dimension>
using constant_memory = device_memory< T, memory_region::constant, Dimension >
 
template<class T , size_t Dimension>
using shared_memory = device_memory< T, memory_region::usm_shared, Dimension >
 
template<class T >
using type_identity_t = typename type_identity< T >::type
 
template<typename T >
using arith_t = typename arith< T >::type
 

Enumerations

enum class  memory_region { global = 0 , constant , local , usm_shared }
 
enum class  target { device , local }
 

Functions

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_add (T *addr, arith_t< T > operand)
 Atomically add the value operand to the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_sub (T *addr, arith_t< T > operand)
 Atomically subtract the value operand from the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_and (T *addr, type_identity_t< T > operand)
 Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_or (T *addr, type_identity_t< T > operand)
 Atomically or the value at the addr with the value operand, and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_xor (T *addr, type_identity_t< T > operand)
 Atomically xor the value at the addr with the value operand, and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_min (T *addr, type_identity_t< T > operand)
 Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_fetch_max (T *addr, type_identity_t< T > operand)
 Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int atomic_fetch_compare_dec (unsigned int *addr, unsigned int operand)
 Atomically set operand to the value stored in addr, if old value stored in addr is equal to zero or greater than operand, else decrease the value stored in addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int atomic_fetch_compare_inc (unsigned int *addr, unsigned int operand)
 Atomically increment the value stored in addr if old value stored in addr is less than operand, else set 0 to the value stored in addr. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_exchange (T *addr, type_identity_t< T > operand)
 Atomically exchange the value at the address addr with the value operand. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_compare_exchange_strong (sycl::multi_ptr< T, sycl::access::address_space::generic_space > addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
 Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected. More...
 
template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
atomic_compare_exchange_strong (T *addr, type_identity_t< T > expected, type_identity_t< T > desired, sycl::memory_order success=sycl::memory_order::relaxed, sycl::memory_order fail=sycl::memory_order::relaxed)
 Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected. More...
 
static void destroy_event (event_ptr event)
 Destroy event pointed memory. More...
 
sycl::queue create_queue (bool print_on_async_exceptions=false, bool in_order=true)
 
static sycl::queue get_default_queue ()
 Util function to get the default queue of current device in device manager. More...
 
static void set_default_queue (const sycl::queue &q)
 Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well. More...
 
void wait (sycl::queue q=get_default_queue())
 
static unsigned int get_current_device_id ()
 Util function to get the id of current device in device manager. More...
 
static device_extget_current_device ()
 Util function to get the current device. More...
 
static device_extget_device (unsigned int id)
 Util function to get a device by id. More...
 
static sycl::context get_default_context ()
 Util function to get the context of the default queue of current device in device manager. More...
 
static device_extcpu_device ()
 Util function to get a CPU device. More...
 
static unsigned int select_device (unsigned int id)
 
dim3 operator* (const dim3 &a, const dim3 &b)
 
dim3 operator+ (const dim3 &a, const dim3 &b)
 
dim3 operator- (const dim3 &a, const dim3 &b)
 
void wg_barrier ()
 
static void get_kernel_function_info (kernel_function_info *kernel_info, const void *function)
 
static kernel_function_info get_kernel_function_info (const void *function)
 
template<int Dim>
sycl::nd_range< Dim > compute_nd_range (sycl::range< Dim > global_size_in, sycl::range< Dim > work_group_size)
 
sycl::nd_range< 1 > compute_nd_range (int global_size_in, int work_group_size)
 
template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const sycl::nd_range< Dim > &range, sycl::queue q, Args... args)
 
template<auto F, int Dim, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const sycl::nd_range< Dim > &range, Args... args)
 
template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const dim3 &grid, const dim3 &threads, sycl::queue q, Args... args)
 
template<auto F, typename... Args>
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::eventlaunch (const dim3 &grid, const dim3 &threads, Args... args)
 
template<auto F, int Dim, typename... Args>
sycl::event launch (const sycl::nd_range< Dim > &range, size_t mem_size, sycl::queue q, Args... args)
 Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue. More...
 
template<auto F, int Dim, typename... Args>
sycl::event launch (const sycl::nd_range< Dim > &range, size_t mem_size, Args... args)
 Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue. More...
 
template<auto F, typename... Args>
sycl::event launch (const dim3 &grid, const dim3 &threads, size_t mem_size, sycl::queue q, Args... args)
 Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue. More...
 
template<auto F, typename... Args>
sycl::event launch (const dim3 &grid, const dim3 &threads, size_t mem_size, Args... args)
 Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue. More...
 
template<typename AllocT >
auto * local_mem ()
 
static void * malloc (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block on the device. More...
 
static void * malloc_host (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block on the host. More...
 
static void * malloc_shared (size_t num_bytes, sycl::queue q=get_default_queue())
 Allocate memory block of usm_shared memory. More...
 
static pitched_data malloc (sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Allocate memory block for 3D array on the device. More...
 
static void * malloc (size_t &pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Allocate memory block for 2D array on the device. More...
 
static void free (void *ptr, sycl::queue q=get_default_queue())
 free More...
 
sycl::event free_async (const std::vector< void * > &pointers, const std::vector< sycl::event > &events, sycl::queue q=get_default_queue())
 Free the device memory pointed by a batch of pointers in pointers which are related to q after events completed. More...
 
static void memcpy (void *to_ptr, const void *from_ptr, size_t size, sycl::queue q=get_default_queue())
 Synchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr. More...
 
static sycl::event memcpy_async (void *to_ptr, const void *from_ptr, size_t size, sycl::queue q=get_default_queue())
 Asynchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr. More...
 
template<typename T >
static sycl::event memcpy_async (type_identity_t< T > *to_ptr, const type_identity_t< T > *from_ptr, size_t count, sycl::queue q=get_default_queue())
 Asynchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr. More...
 
template<typename T >
static void memcpy (type_identity_t< T > *to_ptr, const type_identity_t< T > *from_ptr, size_t count, sycl::queue q=get_default_queue())
 Synchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr. More...
 
static void memcpy (void *to_ptr, size_t to_pitch, const void *from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Synchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr. More...
 
static sycl::event memcpy_async (void *to_ptr, size_t to_pitch, const void *from_ptr, size_t from_pitch, size_t x, size_t y, sycl::queue q=get_default_queue())
 Asynchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr. More...
 
static void memcpy (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Synchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from. More...
 
static sycl::event memcpy_async (pitched_data to, sycl::id< 3 > to_pos, pitched_data from, sycl::id< 3 > from_pos, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Asynchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from. More...
 
template<class T >
static void fill (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
 Synchronously sets pattern to the first count elements starting from dev_ptr. More...
 
template<class T >
static sycl::event fill_async (void *dev_ptr, const T &pattern, size_t count, sycl::queue q=get_default_queue())
 Asynchronously sets pattern to the first count elements starting from dev_ptr. More...
 
static void memset (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
 Synchronously sets value to the first size bytes starting from dev_ptr. More...
 
static sycl::event memset_async (void *dev_ptr, int value, size_t size, sycl::queue q=get_default_queue())
 Asynchronously sets value to the first size bytes starting from dev_ptr. More...
 
static void memset (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets value to the 2D memory region pointed by ptr in q. More...
 
static sycl::event memset_async (void *ptr, size_t pitch, int val, size_t x, size_t y, sycl::queue q=get_default_queue())
 Sets value to the 2D memory region pointed by ptr in q. More...
 
static void memset (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Sets value to the 3D memory region specified by pitch in q. More...
 
static sycl::event memset_async (pitched_data pitch, int val, sycl::range< 3 > size, sycl::queue q=get_default_queue())
 Sets value to the 3D memory region specified by pitch in q. More...
 
int cast_double_to_int (double d, bool use_high32=true)
 Cast the high or low 32 bits of a double to an integer. More...
 
double cast_ints_to_double (int high32, int low32)
 Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double. More...
 
float fast_length (const float *a, int len)
 Compute fast_length for variable-length array. More...
 
template<typename S , typename T >
vectorized_max (T a, T b)
 Compute vectorized max for two values, with each value treated as a vector type S. More...
 
template<typename S , typename T >
vectorized_min (T a, T b)
 Compute vectorized min for two values, with each value treated as a vector type S. More...
 
template<typename S , typename T >
vectorized_isgreater (T a, T b)
 Compute vectorized isgreater for two values, with each value treated as a vector type S. More...
 
template<>
unsigned vectorized_isgreater< sycl::ushort2, unsigned > (unsigned a, unsigned b)
 Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short. More...
 
template<typename T >
reverse_bits (T a)
 Reverse the bit order of an unsigned integer. More...
 
unsigned int byte_level_permute (unsigned int a, unsigned int b, unsigned int s)
 
template<typename T >
int ffs (T a)
 Find position of first least significant set bit in an integer. More...
 
template<typename T >
select_from_sub_group (sycl::sub_group g, T x, int remote_local_id, int logical_sub_group_size=32)
 select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group. More...
 
template<typename T >
shift_sub_group_left (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
 shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left. More...
 
template<typename T >
shift_sub_group_right (sycl::sub_group g, T x, unsigned int delta, int logical_sub_group_size=32)
 shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right. More...
 
template<typename T >
permute_sub_group_by_xor (sycl::sub_group g, T x, unsigned int mask, int logical_sub_group_size=32)
 permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask. More...
 
template<typename T >
sycl::vec< T, 2 > cmul (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
 Computes the multiplication of two complex numbers. More...
 
template<typename T >
sycl::vec< T, 2 > cdiv (sycl::vec< T, 2 > x, sycl::vec< T, 2 > y)
 Computes the division of two complex numbers. More...
 
template<typename T >
cabs (sycl::vec< T, 2 > x)
 Computes the magnitude of a complex number. More...
 
template<typename T >
sycl::vec< T, 2 > conj (sycl::vec< T, 2 > x)
 Computes the complex conjugate of a complex number. More...
 
int get_sycl_language_version ()
 Inherited from the original SYCLomatic compatibility headers. More...
 

Typedef Documentation

◆ arith_t

template<typename T >
using syclcompat::arith_t = typedef typename arith<T>::type

Definition at line 42 of file traits.hpp.

◆ byte_t

using syclcompat::byte_t = typedef uint8_t

Definition at line 90 of file memory.hpp.

◆ constant_memory

template<class T , size_t Dimension>
using syclcompat::constant_memory = typedef device_memory<T, memory_region::constant, Dimension>

Definition at line 1034 of file memory.hpp.

◆ event_ptr

Definition at line 79 of file device.hpp.

◆ global_memory

template<class T , size_t Dimension>
using syclcompat::global_memory = typedef device_memory<T, memory_region::global, Dimension>

Definition at line 1032 of file memory.hpp.

◆ kernel_functor

typedef void(* syclcompat::kernel_functor) (sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)

Definition at line 40 of file kernel.hpp.

◆ queue_ptr

Definition at line 81 of file device.hpp.

◆ shared_memory

template<class T , size_t Dimension>
using syclcompat::shared_memory = typedef device_memory<T, memory_region::usm_shared, Dimension>

Definition at line 1036 of file memory.hpp.

◆ type_identity_t

template<class T >
using syclcompat::type_identity_t = typedef typename type_identity<T>::type

Definition at line 35 of file traits.hpp.

Enumeration Type Documentation

◆ memory_region

Enumerator
global 
constant 
local 
usm_shared 

Definition at line 81 of file memory.hpp.

◆ target

enum syclcompat::target
strong
Enumerator
device 
local 

Definition at line 88 of file memory.hpp.

Function Documentation

◆ atomic_compare_exchange_strong() [1/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_compare_exchange_strong ( sycl::multi_ptr< T, sycl::access::address_space::generic_space >  addr,
type_identity_t< T >  expected,
type_identity_t< T >  desired,
sycl::memory_order  success = sycl::memory_order::relaxed,
sycl::memory_order  fail = sycl::memory_order::relaxed 
)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters
[in,out]addrMulti_ptr.
expectedThe value to compare against the value at addr.
desiredThe value to assign to addr if the value at addr is expected.
successThe memory ordering used when comparison succeeds.
failThe memory ordering used when comparison fails.
Returns
The value at the addr before the call.

Definition at line 253 of file atomic.hpp.

◆ atomic_compare_exchange_strong() [2/2]

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_compare_exchange_strong ( T *  addr,
type_identity_t< T >  expected,
type_identity_t< T >  desired,
sycl::memory_order  success = sycl::memory_order::relaxed,
sycl::memory_order  fail = sycl::memory_order::relaxed 
)

Atomically compare the value at addr to the value expected and exchange with the value desired if the value at addr is equal to the value expected.

Returns the value at the addr before the call.

Parameters
[in]addrThe pointer to the data.
expectedThe value to compare against the value at addr.
desiredThe value to assign to addr if the value at addr is expected.
successThe memory ordering used when comparison succeeds.
failThe memory ordering used when comparison fails.
Returns
The value at the addr before the call.

Definition at line 279 of file atomic.hpp.

◆ atomic_exchange()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_exchange ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically exchange the value at the address addr with the value operand.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to be exchanged with the value pointed by addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 232 of file atomic.hpp.

◆ atomic_fetch_add()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_add ( T *  addr,
arith_t< T >  operand 
)
inline

Atomically add the value operand to the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to add to the value at addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 56 of file atomic.hpp.

◆ atomic_fetch_and()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_and ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically perform a bitwise AND between the value operand and the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise AND operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 91 of file atomic.hpp.

◆ atomic_fetch_compare_dec()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int syclcompat::atomic_fetch_compare_dec ( unsigned int *  addr,
unsigned int  operand 
)

Atomically set operand to the value stored in addr, if old value stored in addr is equal to zero or greater than operand, else decrease the value stored in addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe threshold value.
memoryOrderThe memory ordering used.
Returns
The old value stored in addr.

Definition at line 176 of file atomic.hpp.

◆ atomic_fetch_compare_inc()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device>
unsigned int syclcompat::atomic_fetch_compare_inc ( unsigned int *  addr,
unsigned int  operand 
)
inline

Atomically increment the value stored in addr if old value stored in addr is less than operand, else set 0 to the value stored in addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe threshold value.
memoryOrderThe memory ordering used.
Returns
The old value stored in addr.

Definition at line 205 of file atomic.hpp.

◆ atomic_fetch_max()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_max ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically calculate the maximum of the value at addr and the value operand and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operand.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 160 of file atomic.hpp.

◆ atomic_fetch_min()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_min ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically calculate the minimum of the value at addr and the value operand and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operand.

Definition at line 143 of file atomic.hpp.

◆ atomic_fetch_or()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_or ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically or the value at the addr with the value operand, and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise OR operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 109 of file atomic.hpp.

◆ atomic_fetch_sub()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_sub ( T *  addr,
arith_t< T >  operand 
)
inline

Atomically subtract the value operand from the value at the addr and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to subtract from the value at addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 73 of file atomic.hpp.

◆ atomic_fetch_xor()

template<sycl::access::address_space addressSpace = sycl::access::address_space::generic_space, sycl::memory_order memoryOrder = sycl::memory_order::relaxed, sycl::memory_scope memoryScope = sycl::memory_scope::device, typename T >
T syclcompat::atomic_fetch_xor ( T *  addr,
type_identity_t< T >  operand 
)
inline

Atomically xor the value at the addr with the value operand, and assign the result to the value at addr.

Parameters
[in,out]addrThe pointer to the data.
operandThe value to use in bitwise XOR operation with the value at the addr.
memoryOrderThe memory ordering used.
Returns
The value at the addr before the call.

Definition at line 127 of file atomic.hpp.

◆ byte_level_permute()

unsigned int syclcompat::byte_level_permute ( unsigned int  a,
unsigned int  b,
unsigned int  s 
)
inline
Parameters
[in]aThe first value contains 4 bytes
[in]bThe second value contains 4 bytes
[in]sThe selector value, only lower 16bit used
Returns
the permutation result of 4 bytes selected in the way specified by s from a and b

Definition at line 236 of file util.hpp.

◆ cabs()

template<typename T >
T syclcompat::cabs ( sycl::vec< T, 2 >  x)

Computes the magnitude of a complex number.

Template Parameters
TComplex element type
Parameters
[in]xThe input complex number
Returns
The result

Definition at line 392 of file util.hpp.

◆ cast_double_to_int()

int syclcompat::cast_double_to_int ( double  d,
bool  use_high32 = true 
)
inline

Cast the high or low 32 bits of a double to an integer.

Parameters
[in]dThe double value.
[in]use_high32Cast the high 32 bits of the double if true; otherwise cast the low 32 bits.

Definition at line 104 of file util.hpp.

◆ cast_ints_to_double()

double syclcompat::cast_ints_to_double ( int  high32,
int  low32 
)
inline

Combine two integers, the first as the high 32 bits and the second as the low 32 bits, into a double.

Parameters
[in]high32The integer as the high 32 bits
[in]low32The integer as the low 32 bits

Definition at line 116 of file util.hpp.

◆ cdiv()

template<typename T >
sycl::vec<T, 2> syclcompat::cdiv ( sycl::vec< T, 2 >  x,
sycl::vec< T, 2 >  y 
)

Computes the division of two complex numbers.

Template Parameters
TComplex element type
Parameters
[in]xThe first input complex number
[in]yThe second input complex number
Returns
The result

Definition at line 382 of file util.hpp.

◆ cmul()

template<typename T >
sycl::vec<T, 2> syclcompat::cmul ( sycl::vec< T, 2 >  x,
sycl::vec< T, 2 >  y 
)

Computes the multiplication of two complex numbers.

Template Parameters
TComplex element type
Parameters
[in]xThe first input complex number
[in]yThe second input complex number
Returns
The result

Definition at line 370 of file util.hpp.

◆ compute_nd_range() [1/2]

sycl::nd_range<1> syclcompat::compute_nd_range ( int  global_size_in,
int  work_group_size 
)
inline

Definition at line 110 of file launch.hpp.

References sycl::_V1::ext::oneapi::experimental::work_group_size.

◆ compute_nd_range() [2/2]

template<int Dim>
sycl::nd_range<Dim> syclcompat::compute_nd_range ( sycl::range< Dim >  global_size_in,
sycl::range< Dim >  work_group_size 
)

◆ conj()

template<typename T >
sycl::vec<T, 2> syclcompat::conj ( sycl::vec< T, 2 >  x)

Computes the complex conjugate of a complex number.

Template Parameters
TComplex element type
Parameters
[in]xThe input complex number
Returns
The result

Definition at line 401 of file util.hpp.

◆ cpu_device()

static device_ext& syclcompat::cpu_device ( )
inlinestatic

Util function to get a CPU device.

Definition at line 565 of file device.hpp.

References syclcompat::detail::dev_mgr::cpu_device(), and syclcompat::detail::dev_mgr::instance().

◆ create_queue()

sycl::queue syclcompat::create_queue ( bool  print_on_async_exceptions = false,
bool  in_order = true 
)
inline

◆ destroy_event()

static void syclcompat::destroy_event ( event_ptr  event)
static

Destroy event pointed memory.

Parameters
eventPointer to the sycl::event address.

Definition at line 86 of file device.hpp.

◆ fast_length()

float syclcompat::fast_length ( const float *  a,
int  len 
)
inline

Compute fast_length for variable-length array.

Parameters
[in]aThe array
[in]lenLength of the array
Returns
The computed fast_length

Definition at line 126 of file util.hpp.

◆ ffs()

template<typename T >
int syclcompat::ffs ( a)
inline

Find position of first least significant set bit in an integer.

ffs(0) returns 0.

Parameters
[in]aInput integer value
Returns
The position

Definition at line 252 of file util.hpp.

◆ fill()

template<class T >
static void syclcompat::fill ( void *  dev_ptr,
const T &  pattern,
size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously sets pattern to the first count elements starting from dev_ptr.

The function will return after the fill operation is completed.

Template Parameters
TDatatype of the value to be set.
Parameters
dev_ptrPointer to the device memory address.
patternPattern of type T to be set.
countNumber of elements to be set to the patten.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 697 of file memory.hpp.

Referenced by fill_async().

◆ fill_async()

template<class T >
static sycl::event syclcompat::fill_async ( void *  dev_ptr,
const T &  pattern,
size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously sets pattern to the first count elements starting from dev_ptr.

The return of the function does NOT guarantee the fill operation is completed.

Template Parameters
TDatatype of the pattern to be set.
Parameters
dev_ptrPointer to the device memory address.
patternPattern of type T to be set.
countNumber of elements to be set to the patten.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 714 of file memory.hpp.

References fill().

◆ free()

static void syclcompat::free ( void *  ptr,
sycl::queue  q = get_default_queue() 
)
inlinestatic

free

Parameters
ptrPoint to free.
qQueue to execute the free task.
Returns
no return value.

Definition at line 513 of file memory.hpp.

References sycl::_V1::queue::get_context().

Referenced by syclcompat::device_memory< T, Memory, Dimension >::~device_memory().

◆ free_async()

sycl::event syclcompat::free_async ( const std::vector< void * > &  pointers,
const std::vector< sycl::event > &  events,
sycl::queue  q = get_default_queue() 
)
inline

Free the device memory pointed by a batch of pointers in pointers which are related to q after events completed.

Parameters
pointersThe pointers point to the device memory requested to be freed.
eventsThe events to be waited.
qThe sycl::queue the memory relates to.

Definition at line 525 of file memory.hpp.

◆ get_current_device()

static device_ext& syclcompat::get_current_device ( )
inlinestatic

Util function to get the current device.

Definition at line 549 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), and syclcompat::detail::dev_mgr::instance().

Referenced by get_default_context().

◆ get_current_device_id()

static unsigned int syclcompat::get_current_device_id ( )
inlinestatic

Util function to get the id of current device in device manager.

Definition at line 544 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device_id(), and syclcompat::detail::dev_mgr::instance().

◆ get_default_context()

static sycl::context syclcompat::get_default_context ( )
inlinestatic

Util function to get the context of the default queue of current device in device manager.

Definition at line 560 of file device.hpp.

References syclcompat::device_ext::get_context(), and get_current_device().

◆ get_default_queue()

static sycl::queue syclcompat::get_default_queue ( )
inlinestatic

Util function to get the default queue of current device in device manager.

Definition at line 525 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::device_ext::default_queue(), and syclcompat::detail::dev_mgr::instance().

Referenced by launch().

◆ get_device()

static device_ext& syclcompat::get_device ( unsigned int  id)
inlinestatic

◆ get_kernel_function_info() [1/2]

static kernel_function_info syclcompat::get_kernel_function_info ( const void *  function)
static

◆ get_kernel_function_info() [2/2]

static void syclcompat::get_kernel_function_info ( kernel_function_info kernel_info,
const void *  function 
)
static

◆ get_sycl_language_version()

int syclcompat::get_sycl_language_version ( )
inline

Inherited from the original SYCLomatic compatibility headers.

Returns
compiler's SYCL version if defined, 202000 otherwise.

Definition at line 409 of file util.hpp.

◆ launch() [1/8]

template<auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const dim3 grid,
const dim3 threads,
Args...  args 
)

Definition at line 136 of file launch.hpp.

References get_default_queue().

◆ launch() [2/8]

template<auto F, typename... Args>
sycl::event syclcompat::launch ( const dim3 grid,
const dim3 threads,
size_t  mem_size,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using the default SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
gridGrid dimensions represented with an (x, y, z) iteration space.
threadsBlock dimensions represented with an (x, y, z) iteration space.
mem_sizeThe size, in number of bytes, of the local memory to be allocated.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 218 of file launch.hpp.

References get_default_queue().

◆ launch() [3/8]

template<auto F, typename... Args>
sycl::event syclcompat::launch ( const dim3 grid,
const dim3 threads,
size_t  mem_size,
sycl::queue  q,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device with a user-specified grid and block dimensions following the standard of other programming models using a user-defined SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
gridGrid dimensions represented with an (x, y, z) iteration space.
threadsBlock dimensions represented with an (x, y, z) iteration space.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 196 of file launch.hpp.

◆ launch() [4/8]

template<auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const dim3 grid,
const dim3 threads,
sycl::queue  q,
Args...  args 
)

Definition at line 130 of file launch.hpp.

◆ launch() [5/8]

template<auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const sycl::nd_range< Dim > &  range,
Args...  args 
)

Definition at line 123 of file launch.hpp.

References get_default_queue().

◆ launch() [6/8]

template<auto F, int Dim, typename... Args>
sycl::event syclcompat::launch ( const sycl::nd_range< Dim > &  range,
size_t  mem_size,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range using theSYCL default queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
rangeNd_range specifying the work group and global sizes for the kernel.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 175 of file launch.hpp.

References get_default_queue().

◆ launch() [7/8]

template<auto F, int Dim, typename... Args>
sycl::event syclcompat::launch ( const sycl::nd_range< Dim > &  range,
size_t  mem_size,
sycl::queue  q,
Args...  args 
)

Launches a kernel with the templated F param and arguments on a device specified by the given nd_range and SYCL queue.

Template Parameters
FSYCL kernel to be executed, expects signature F(T* local_mem, Args... args).
Dimnd_range dimension number.
ArgsTypes of the arguments to be passed to the kernel.
Parameters
rangeNd_range specifying the work group and global sizes for the kernel.
qThe SYCL queue on which to execute the kernel.
mem_sizeThe size, in number of bytes, of the local memory to be allocated for kernel.
argsThe arguments to be passed to the kernel.
Returns
A SYCL event object that can be used to synchronize with the kernel's execution.

Definition at line 155 of file launch.hpp.

◆ launch() [8/8]

template<auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::launch ( const sycl::nd_range< Dim > &  range,
sycl::queue  q,
Args...  args 
)

Definition at line 117 of file launch.hpp.

◆ local_mem()

template<typename AllocT >
auto* syclcompat::local_mem ( )

Definition at line 63 of file memory.hpp.

Referenced by syclcompat::detail::launch().

◆ malloc() [1/3]

static void* syclcompat::malloc ( size_t &  pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block for 2D array on the device.

Parameters
[out]pitchAligned size of x in bytes.
xRange in dim x.
yRange in dim y.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 504 of file memory.hpp.

Referenced by syclcompat::device_memory< T, Memory, Dimension >::device_memory(), and malloc().

◆ malloc() [2/3]

static void* syclcompat::malloc ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block on the device.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.
Parameters
TDatatype to allocate
countNumber of elements to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 430 of file memory.hpp.

References malloc().

◆ malloc() [3/3]

static pitched_data syclcompat::malloc ( sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block for 3D array on the device.

Parameters
sizeSize of the memory block, in bytes.
qQueue to execute the allocate task.
Returns
A pitched_data object which stores the memory info.

Definition at line 488 of file memory.hpp.

References sycl::_V1::detail::array< dimensions >::get(), malloc(), syclcompat::pitched_data::set_data_ptr(), and syclcompat::pitched_data::set_pitch().

◆ malloc_host()

static void* syclcompat::malloc_host ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block on the host.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.
Parameters
TDatatype to allocate
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 449 of file memory.hpp.

◆ malloc_shared()

static void* syclcompat::malloc_shared ( size_t  count,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Allocate memory block of usm_shared memory.

Parameters
num_bytesNumber of bytes to allocate.
qQueue to execute the allocate task.
Returns
A pointer to the newly allocated memory.

Definition at line 469 of file memory.hpp.

◆ memcpy() [1/4]

static void syclcompat::memcpy ( pitched_data  to,
sycl::id< 3 >  to_pos,
pitched_data  from,
sycl::id< 3 >  from_pos,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size.

Parameters
toDestination matrix info.
to_posPosition of destination.
fromSource matrix info.
from_posPosition of destination.
sizeRange of the submatrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 660 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

Referenced by syclcompat::device_memory< T, Memory, Dimension >::init(), memcpy(), and memcpy_async().

◆ memcpy() [2/4]

template<typename T >
static void syclcompat::memcpy ( type_identity_t< T > *  to_ptr,
const type_identity_t< T > *  from_ptr,
size_t  count,
sycl::queue  q = get_default_queue() 
)
static

Synchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Template Parameters
TDatatype to be copied.
Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
countNumber of T to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 597 of file memory.hpp.

References memcpy().

◆ memcpy() [3/4]

static void syclcompat::memcpy ( void *  to_ptr,
const void *  from_ptr,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Synchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The function will return after the copy is completed.

Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
sizeNumber of bytes to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 549 of file memory.hpp.

References memcpy().

Referenced by syclcompat::detail::memcpy().

◆ memcpy() [4/4]

static void syclcompat::memcpy ( void *  to_ptr,
size_t  to_pitch,
const void *  from_ptr,
size_t  from_pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Synchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The function will return after the copy is completed.

Parameters
to_ptrPointer to destination memory address.
to_pitchRange of dim x in bytes of destination matrix.
from_ptrPointer to source memory address.
from_pitchRange of dim x in bytes of source matrix.
xRange of dim x of matrix to be copied.
yRange of dim y of matrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 619 of file memory.hpp.

References memcpy(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memcpy_async() [1/4]

static sycl::event syclcompat::memcpy_async ( pitched_data  to,
sycl::id< 3 >  to_pos,
pitched_data  from,
sycl::id< 3 >  from_pos,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously copies a subset of a 3D matrix specified by to to another 3D matrix specified by from.

The from and to position info are specified by from_pos and to_pos The copied matrix size is specified by size. The return of the function does NOT guarantee the copy is completed.

Parameters
toDestination matrix info.
to_posPosition of destination.
fromSource matrix info.
from_posPosition of destination.
sizeRange of the submatrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 679 of file memory.hpp.

References syclcompat::detail::combine_events(), and memcpy().

◆ memcpy_async() [2/4]

template<typename T >
static sycl::event syclcompat::memcpy_async ( type_identity_t< T > *  to_ptr,
const type_identity_t< T > *  from_ptr,
size_t  count,
sycl::queue  q = get_default_queue() 
)
static

Asynchronously copies count T's from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Template Parameters
TDatatype to be copied.
Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
countNumber of T to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 580 of file memory.hpp.

References memcpy().

◆ memcpy_async() [3/4]

static sycl::event syclcompat::memcpy_async ( void *  to_ptr,
const void *  from_ptr,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Asynchronously copies size bytes from the address specified by from_ptr to the address specified by to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters
to_ptrPointer to destination memory address.
from_ptrPointer to source memory address.
sizeNumber of bytes to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 563 of file memory.hpp.

References memcpy().

◆ memcpy_async() [4/4]

static sycl::event syclcompat::memcpy_async ( void *  to_ptr,
size_t  to_pitch,
const void *  from_ptr,
size_t  from_pitch,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Asynchronously copies 2D matrix specified by x and y from the address specified by from_ptr to the address specified by to_ptr, while from_pitch and to_pitch are the range of dim x in bytes of the matrix specified by from_ptr and to_ptr.

The return of the function does NOT guarantee the copy is completed.

Parameters
to_ptrPointer to destination memory address.
to_pitchRange of dim x in bytes of destination matrix.
from_ptrPointer to source memory address.
from_pitchRange of dim x in bytes of source matrix.
xRange of dim x of matrix to be copied.
yRange of dim y of matrix to be copied.
qQueue to execute the copy task.
Returns
no return value.

Definition at line 640 of file memory.hpp.

References syclcompat::detail::combine_events(), and memcpy().

◆ memset() [1/3]

static void syclcompat::memset ( pitched_data  pitch,
int  val,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The function will return after the memset operation is completed.

Parameters
pitchSpecify the 3D memory region.
valueValue to be set.
sizeThe setted 3D memory size.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 791 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::wait().

Referenced by memset(), and memset_async().

◆ memset() [2/3]

static void syclcompat::memset ( void *  dev_ptr,
int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Synchronously sets value to the first size bytes starting from dev_ptr.

The function will return after the memset operation is completed.

Parameters
dev_ptrPointer to the device memory address.
valueValue to be set.
sizeNumber of bytes to be set to the value.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 728 of file memory.hpp.

References memset().

◆ memset() [3/3]

static void syclcompat::memset ( void *  ptr,
size_t  pitch,
int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 2D memory region pointed by ptr in q.

x and y specify the setted 2D memory size. pitch is the bytes in linear dimension, including padding bytes. The function will return after the memset operation is completed.

Parameters
ptrPointer to the device memory region.
pitchBytes in linear dimension, including padding bytes.
valueValue to be set.
xThe setted memory size in linear dimension.
yThe setted memory size in second dimension.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 758 of file memory.hpp.

References memset(), and sycl::_V1::ext::intel::experimental::esimd::wait().

◆ memset_async() [1/3]

static sycl::event syclcompat::memset_async ( pitched_data  pitch,
int  val,
sycl::range< 3 >  size,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 3D memory region specified by pitch in q.

size specify the setted 3D memory size. The return of the function does NOT guarantee the memset operation is completed.

Parameters
pitchSpecify the 3D memory region.
valueValue to be set.
sizeThe setted 3D memory size.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 805 of file memory.hpp.

References syclcompat::detail::combine_events(), and memset().

◆ memset_async() [2/3]

static sycl::event syclcompat::memset_async ( void *  dev_ptr,
int  value,
size_t  size,
sycl::queue  q = get_default_queue() 
)
static

Asynchronously sets value to the first size bytes starting from dev_ptr.

The return of the function does NOT guarantee the memset operation is completed.

Parameters
dev_ptrPointer to the device memory address.
valueValue to be set.
sizeNumber of bytes to be set to the value.
Returns
no return value.

Definition at line 741 of file memory.hpp.

References memset().

◆ memset_async() [3/3]

static sycl::event syclcompat::memset_async ( void *  ptr,
size_t  pitch,
int  val,
size_t  x,
size_t  y,
sycl::queue  q = get_default_queue() 
)
inlinestatic

Sets value to the 2D memory region pointed by ptr in q.

x and y specify the setted 2D memory size. pitch is the bytes in linear dimension, including padding bytes. The return of the function does NOT guarantee the memset operation is completed.

Parameters
ptrPointer to the device memory region.
pitchBytes in linear dimension, including padding bytes.
valueValue to be set.
xThe setted memory size in linear dimension.
yThe setted memory size in second dimension.
qThe queue in which the operation is done.
Returns
no return value.

Definition at line 775 of file memory.hpp.

References syclcompat::detail::combine_events(), and memset().

◆ operator*()

dim3 syclcompat::operator* ( const dim3 a,
const dim3 b 
)
inline

Definition at line 60 of file dims.hpp.

◆ operator+()

dim3 syclcompat::operator+ ( const dim3 a,
const dim3 b 
)
inline

Definition at line 64 of file dims.hpp.

◆ operator-()

dim3 syclcompat::operator- ( const dim3 a,
const dim3 b 
)
inline

Definition at line 68 of file dims.hpp.

◆ permute_sub_group_by_xor()

template<typename T >
T syclcompat::permute_sub_group_by_xor ( sycl::sub_group  g,
x,
unsigned int  mask,
int  logical_sub_group_size = 32 
)

permute_sub_group_by_xor permutes values by exchanging values held by pairs of work-items identified by computing the bitwise exclusive OR of the work-item id and some fixed mask.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is bitwise exclusive OR of the caller's id and mask. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]maskInput mask
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 352 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ reverse_bits()

template<typename T >
T syclcompat::reverse_bits ( a)
inline

Reverse the bit order of an unsigned integer.

Parameters
[in]aInput unsigned integer value
Returns
Value of a with the bit order reversed

Definition at line 215 of file util.hpp.

◆ select_device()

static unsigned int syclcompat::select_device ( unsigned int  id)
inlinestatic

◆ select_from_sub_group()

template<typename T >
T syclcompat::select_from_sub_group ( sycl::sub_group  g,
x,
int  remote_local_id,
int  logical_sub_group_size = 32 
)

select_from_sub_group allows work-items to obtain a copy of a value held by any other work-item in the sub_group.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size

  • 1]. Each work-item in logical sub_group gets value from another work-item whose id is remote_local_id. If remote_local_id is outside the logical sub_group id range, remote_local_id will modulo with logical_sub_group_size. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.
    Template Parameters
    TInput value type
    Parameters
    [in]gInput sub_group
    [in]xInput value
    [in]remote_local_idInput source work item id
    [in]logical_sub_group_sizeInput logical sub_group size
    Returns
    The result

Definition at line 272 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ set_default_queue()

static void syclcompat::set_default_queue ( const sycl::queue q)
inlinestatic

Util function to change the default queue of the current device in the device manager If the device extension saved queue is the default queue, the previous saved queue will be overwritten as well.

This function will be blocking if there are submitted kernels in the previous default queue.

Parameters
qNew user-defined queue

Definition at line 536 of file device.hpp.

References syclcompat::detail::dev_mgr::current_device(), syclcompat::detail::dev_mgr::instance(), and syclcompat::device_ext::set_default_queue().

◆ shift_sub_group_left()

template<typename T >
T syclcompat::shift_sub_group_left ( sycl::sub_group  g,
x,
unsigned int  delta,
int  logical_sub_group_size = 32 
)

shift_sub_group_left move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the left.

The input sub_group will be divided into several logical sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical sub_group gets value from another work-item whose id is caller's id adds delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]deltaInput delta
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 296 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ shift_sub_group_right()

template<typename T >
T syclcompat::shift_sub_group_right ( sycl::sub_group  g,
x,
unsigned int  delta,
int  logical_sub_group_size = 32 
)

shift_sub_group_right move values held by the work-items in a sub_group directly to another work-item in the sub_group, by shifting values a fixed number of work-items to the right.

The input sub_group will be divided into several logical_sub_groups with id range [0, logical_sub_group_size - 1]. Each work-item in logical_sub_group gets value from another work-item whose id is caller's id subtracts delta. If calculated id is outside the logical sub_group id range, the work-item will get value from itself. The logical_sub_group_size must be a power of 2 and not exceed input sub_group size.

Template Parameters
TInput value type
Parameters
[in]gInput sub_group
[in]xInput value
[in]deltaInput delta
[in]logical_sub_group_sizeInput logical sub_group size
Returns
The result

Definition at line 324 of file util.hpp.

References sycl::_V1::sub_group::get_local_linear_id().

◆ vectorized_isgreater()

template<typename S , typename T >
T syclcompat::vectorized_isgreater ( a,
b 
)
inline

Compute vectorized isgreater for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized greater than of the two values

Definition at line 185 of file util.hpp.

◆ vectorized_isgreater< sycl::ushort2, unsigned >()

template<>
unsigned syclcompat::vectorized_isgreater< sycl::ushort2, unsigned > ( unsigned  a,
unsigned  b 
)
inline

Compute vectorized isgreater for two unsigned int values, with each value treated as a vector of two unsigned short.

Parameters
[in]aThe first value
[in]bThe second value
Returns
The vectorized greater than of the two values

Definition at line 200 of file util.hpp.

◆ vectorized_max()

template<typename S , typename T >
T syclcompat::vectorized_max ( a,
b 
)
inline

Compute vectorized max for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized max of the two values

Definition at line 153 of file util.hpp.

◆ vectorized_min()

template<typename S , typename T >
T syclcompat::vectorized_min ( a,
b 
)
inline

Compute vectorized min for two values, with each value treated as a vector type S.

Parameters
[in]SThe type of the vector
[in]TThe type of the original values
[in]aThe first value
[in]bThe second value
Returns
The vectorized min of the two values

Definition at line 169 of file util.hpp.

◆ wait()

void syclcompat::wait ( sycl::queue  q = get_default_queue())
inline

Definition at line 540 of file device.hpp.

Referenced by syclcompat::device_ext::~device_ext().

◆ wg_barrier()

void syclcompat::wg_barrier ( )
inline

Definition at line 31 of file id_query.hpp.