Atomic extension to implement standard APIs in std::atomic. More...
Classes | |
struct | IsValidAtomicType |
class | dev_mgr |
device manager More... | |
class | vectorized_binary |
struct | shift_left |
struct | shift_right |
struct | average |
class | accessor |
class | memory_traits |
class | host_buffer |
class | generic_error_type |
struct | DataType |
struct | DataType< sycl::vec< T, 2 > > |
Typedefs | |
template<typename ValueT > | |
using | complex_type = detail::complex_namespace::complex< ValueT > |
Enumerations | |
enum class | pointer_access_attribute { host_only = 0 , device_only , host_device , end } |
Functions | |
static void | parse_version_string (const std::string &ver, int &major, int &minor) |
static void | get_version (const sycl::device &dev, int &major, int &minor) |
static unsigned int | get_tid () |
static fs::path | write_data_to_file (char const *const data, size_t size) |
Write data to temporary file and return absolute path to temporary file. More... | |
static uint16_t | extract16 (unsigned char const *const ptr) |
static uint32_t | extract32 (unsigned char const *const ptr) |
static uint64_t | extract64 (unsigned char const *const ptr) |
static uint64_t | get_lib_size (char const *const blob) |
static kernel_library | load_dl_from_data (char const *const data, size_t size) |
template<typename R , typename... Types> | |
constexpr size_t | getArgumentCount (R(*f)(Types...)) |
template<int Dim> | |
sycl::nd_range< 3 > | transform_nd_range (const sycl::nd_range< Dim > &range) |
template<auto F, typename... Args> | |
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event > | launch (const sycl::nd_range< 3 > &range, sycl::queue q, Args... args) |
template<auto F, typename... Args> | |
sycl::event | launch (const sycl::nd_range< 3 > &range, size_t mem_size, sycl::queue q, Args... args) |
template<typename ValueT > | |
ValueT | clamp (ValueT val, ValueT min_val, ValueT max_val) |
template<typename ValueT > | |
auto | zero_or_signed_extend (ValueT val, unsigned bit) |
Extend the 'val' to 'bit' size, zero extend for unsigned int and signed extend for signed int. More... | |
template<typename RetT , bool needSat, typename AT , typename BT , typename BinaryOperation > | |
constexpr RetT | extend_binary (AT a, BT b, BinaryOperation binary_op) |
template<typename RetT , bool needSat, typename AT , typename BT , typename CT , typename BinaryOperation1 , typename BinaryOperation2 > | |
constexpr RetT | extend_binary (AT a, BT b, CT c, BinaryOperation1 binary_op, BinaryOperation2 second_op) |
template<typename T > | |
sycl::vec< int32_t, 2 > | extract_and_extend2 (T a) |
template<typename T > | |
sycl::vec< int16_t, 4 > | extract_and_extend4 (T a) |
template<typename RetT , bool NeedSat, bool NeedAdd, typename AT , typename BT , typename BinaryOperation > | |
constexpr RetT | extend_vbinary2 (AT a, BT b, RetT c, BinaryOperation binary_op) |
template<typename RetT , bool NeedSat, bool NeedAdd, typename AT , typename BT , typename BinaryOperation > | |
constexpr RetT | extend_vbinary4 (AT a, BT b, RetT c, BinaryOperation binary_op) |
template<typename ValueT > | |
bool | isnan (const ValueT a) |
template<typename T > | |
T | bfe (const T source, const uint32_t bit_start, const uint32_t num_bits) |
Bitfield-extract. More... | |
template<typename T > | |
T | bfi (const T x, const T y, const uint32_t bit_start, const uint32_t num_bits) |
Bitfield-insert. More... | |
template<typename T > | |
sycl::vec< T, 4 > | extract_and_sign_or_zero_extend4 (T val) |
template<typename T > | |
sycl::vec< T, 2 > | extract_and_sign_or_zero_extend2 (T val) |
static void * | malloc (size_t size, sycl::queue q) |
static constexpr size_t | get_pitch (size_t x) |
Calculate pitch (padded length of major dimension x ) by rounding up to multiple of 32. More... | |
static void * | malloc (size_t &pitch, size_t x, size_t y, size_t z, sycl::queue q) |
Malloc pitched 3D data. More... | |
template<class T > | |
static sycl::event | fill (sycl::queue q, void *dev_ptr, const T &pattern, size_t count) |
Set pattern to the first count elements of type T starting from dev_ptr . More... | |
static sycl::event | memset (sycl::queue q, void *dev_ptr, int value, size_t size) |
Set value to the first size bytes starting from dev_ptr in q . More... | |
template<typename T > | |
static std::vector< sycl::event > | memset (sycl::queue q, pitched_data data, const T &value, sycl::range< 3 > size) |
Sets value to the 3D memory region pointed by data in q . More... | |
template<typename T > | |
static std::vector< sycl::event > | memset (sycl::queue q, void *ptr, size_t pitch, const T &value, size_t x, size_t y) |
Sets val to the pitched 2D memory region pointed by ptr in q . More... | |
static pointer_access_attribute | get_pointer_attribute (sycl::queue q, const void *ptr) |
static experimental::memcpy_direction | deduce_memcpy_direction (sycl::queue q, void *to_ptr, const void *from_ptr) |
static sycl::event | memcpy (sycl::queue q, void *to_ptr, const void *from_ptr, size_t size, const std::vector< sycl::event > &dep_events={}) |
static size_t | get_copy_range (sycl::range< 3 > size, size_t slice, size_t pitch) |
static size_t | get_offset (sycl::id< 3 > id, size_t slice, size_t pitch) |
static std::vector< sycl::event > | memcpy (sycl::queue q, void *to_ptr, const void *from_ptr, sycl::range< 3 > to_range, sycl::range< 3 > from_range, sycl::id< 3 > to_id, sycl::id< 3 > from_id, sycl::range< 3 > size, const std::vector< sycl::event > &dep_events={}) |
copy 3D matrix specified by size from 3D matrix specified by from_ptr and from_range to another specified by to_ptr and to_range . More... | |
static std::vector< sycl::event > | memcpy (sycl::queue q, pitched_data to, sycl::id< 3 > to_id, pitched_data from, sycl::id< 3 > from_id, sycl::range< 3 > size) |
memcpy 2D/3D matrix specified by pitched_data. More... | |
static std::vector< sycl::event > | memcpy (sycl::queue q, void *to_ptr, const void *from_ptr, size_t to_pitch, size_t from_pitch, size_t x, size_t y) |
memcpy 2D matrix with pitch. More... | |
static sycl::event | combine_events (std::vector< sycl::event > &events, sycl::queue q) |
void | matrix_mem_copy (void *to_ptr, const void *from_ptr, int to_ld, int from_ld, int rows, int cols, int elem_size, sycl::queue queue=syclcompat::get_default_queue(), bool async=false) |
template<typename T > | |
void | matrix_mem_copy (T *to_ptr, const T *from_ptr, int to_ld, int from_ld, int rows, int cols, sycl::queue queue=get_default_queue(), bool async=false) |
Copy matrix data. More... | |
Variables | |
auto | exception_handler |
SYCL default exception handler. More... | |
template<typename T > | |
constexpr bool | is_int32_type |
Atomic extension to implement standard APIs in std::atomic.
using syclcompat::detail::complex_type = typedef detail::complex_namespace::complex<ValueT> |
|
strong |
Enumerator | |
---|---|
host_only | |
device_only | |
host_device | |
end |
Definition at line 281 of file memory.hpp.
|
inline |
Bitfield-extract.
T | The type of |
source | value, must be an integer. |
source | The source value to extracting. |
bit_start | The position to start extracting. |
num_bits | The number of bits to extracting. |
Definition at line 238 of file math.hpp.
Referenced by syclcompat::bfe_safe().
|
inline |
Bitfield-insert.
T | The type of |
x | and |
y,must | be an unsigned integer. |
x | The source of the bitfield. |
y | The source where bitfield is inserted. |
bit_start | The position to start insertion. |
num_bits | The number of bits to insertion. |
Definition at line 330 of file math.hpp.
Referenced by syclcompat::bfi_safe().
|
inline |
Definition at line 50 of file math.hpp.
Referenced by syclcompat::clamp(), extend_vbinary2(), and extend_vbinary4().
|
static |
Definition at line 473 of file memory.hpp.
References sycl::_V1::handler::depends_on(), sycl::_V1::handler::host_task(), and sycl::_V1::queue::submit().
Referenced by syclcompat::memcpy_async(), syclcompat::memset_async(), syclcompat::memset_d16_async(), and syclcompat::memset_d32_async().
|
static |
Definition at line 302 of file memory.hpp.
References syclcompat::experimental::device_to_device, syclcompat::experimental::device_to_host, end, get_pointer_attribute(), syclcompat::experimental::host_to_device, and syclcompat::experimental::host_to_host.
|
inlineconstexpr |
Definition at line 120 of file math.hpp.
References syclcompat::max(), syclcompat::min(), and zero_or_signed_extend().
|
inlineconstexpr |
Definition at line 132 of file math.hpp.
References syclcompat::max(), syclcompat::min(), and zero_or_signed_extend().
|
inlineconstexpr |
Definition at line 171 of file math.hpp.
References clamp(), extract_and_extend2(), syclcompat::max(), and syclcompat::min().
|
inlineconstexpr |
Definition at line 195 of file math.hpp.
References clamp(), extract_and_extend4(), syclcompat::max(), and syclcompat::min().
|
inlinestatic |
Definition at line 191 of file kernel.hpp.
Referenced by get_lib_size().
|
inlinestatic |
Definition at line 200 of file kernel.hpp.
Referenced by get_lib_size().
|
inlinestatic |
Definition at line 211 of file kernel.hpp.
Referenced by get_lib_size().
sycl::vec<int32_t, 2> syclcompat::detail::extract_and_extend2 | ( | T | a | ) |
Definition at line 147 of file math.hpp.
References zero_or_signed_extend().
Referenced by extend_vbinary2().
sycl::vec<int16_t, 4> syclcompat::detail::extract_and_extend4 | ( | T | a | ) |
Definition at line 157 of file math.hpp.
References zero_or_signed_extend().
Referenced by extend_vbinary4().
sycl::vec<T, 2> syclcompat::detail::extract_and_sign_or_zero_extend2 | ( | T | val | ) |
Definition at line 1009 of file math.hpp.
Referenced by syclcompat::dp2a_hi(), and syclcompat::dp2a_lo().
sycl::vec<T, 4> syclcompat::detail::extract_and_sign_or_zero_extend4 | ( | T | val | ) |
Definition at line 1002 of file math.hpp.
Referenced by syclcompat::dp2a_hi(), syclcompat::dp2a_lo(), and syclcompat::dp4a().
|
inlinestatic |
Set pattern
to the first count
elements of type T
starting from dev_ptr
.
T | Datatype of the pattern to be set. |
q | The queue in which the operation is done. |
dev_ptr | Pointer to the device memory address. |
pattern | Pattern of type T to be set. |
count | Number of elements to be set to the patten. |
Definition at line 222 of file memory.hpp.
References sycl::_V1::queue::fill().
Referenced by syclcompat::fill(), and syclcompat::fill_async().
|
inlinestatic |
Definition at line 324 of file memory.hpp.
References sycl::_V1::detail::array< dimensions >::get().
|
inlinestatic |
Definition at line 226 of file kernel.hpp.
References extract16(), extract32(), and extract64().
Referenced by syclcompat::load_kernel_library_mem().
|
inlinestatic |
Definition at line 329 of file memory.hpp.
|
inlinestaticconstexpr |
Calculate pitch (padded length of major dimension x
) by rounding up to multiple of 32.
x | The dimension to be padded (in bytes) |
Definition at line 195 of file memory.hpp.
Referenced by sycl::_V1::image< Dimensions, AllocatorT >::get_pitch(), and malloc().
|
static |
Definition at line 288 of file memory.hpp.
References device_only, sycl::_V1::queue::get_context(), host_device, and host_only.
Referenced by deduce_memcpy_direction().
|
inlinestatic |
Definition at line 690 of file device.hpp.
Referenced by syclcompat::detail::dev_mgr::current_device_id(), and syclcompat::detail::dev_mgr::select_device().
|
static |
Definition at line 90 of file device.hpp.
References sycl::_V1::device::get_info(), and parse_version_string().
Referenced by syclcompat::get_major_version(), and syclcompat::get_minor_version().
|
constexpr |
Definition at line 40 of file launch.hpp.
Referenced by launch().
|
inline |
Definition at line 220 of file math.hpp.
References syclcompat::isnan().
Referenced by syclcompat::compare(), syclcompat::fmax_nan(), syclcompat::fmin_nan(), syclcompat::isnan(), syclcompat::relu(), and syclcompat::unordered_compare().
sycl::event syclcompat::detail::launch | ( | const sycl::nd_range< 3 > & | range, |
size_t | mem_size, | ||
sycl::queue | q, | ||
Args... | args | ||
) |
Definition at line 71 of file launch.hpp.
References getArgumentCount(), syclcompat::local_mem(), sycl::_V1::handler::parallel_for(), and sycl::_V1::queue::submit().
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::detail::launch | ( | const sycl::nd_range< 3 > & | range, |
sycl::queue | q, | ||
Args... | args | ||
) |
Definition at line 59 of file launch.hpp.
References getArgumentCount(), and sycl::_V1::queue::parallel_for().
|
inlinestatic |
Definition at line 357 of file kernel.hpp.
References write_data_to_file().
Referenced by syclcompat::load_kernel_library(), and syclcompat::load_kernel_library_mem().
|
inlinestatic |
Malloc pitched 3D data.
[out] | pitch | returns the calculated pitch (in bytes) |
[in] | x | width of the allocation (in bytes) |
[in] | y | height of the allocation |
[in] | z | depth of the allocation |
[in] | q | The queue in which the operation is done. |
Definition at line 206 of file memory.hpp.
References get_pitch(), and malloc().
|
inlinestatic |
Definition at line 187 of file memory.hpp.
References sycl::_V1::queue::get_context(), and sycl::_V1::queue::get_device().
Referenced by malloc(), and syclcompat::malloc().
|
inline |
Copy matrix data.
The default leading dimension is column.
[out] | to_ptr | A pointer points to the destination location. |
[in] | from_ptr | A pointer points to the source location. |
[in] | to_ld | The leading dimension the destination matrix. |
[in] | from_ld | The leading dimension the source matrix. |
[in] | rows | The number of rows of the source matrix. |
[in] | cols | The number of columns of the source matrix. |
[in] | queue | The queue where the routine should be executed. |
[in] | async | If this argument is true, the return of the function does NOT guarantee the copy is completed. |
Definition at line 123 of file util.hpp.
References matrix_mem_copy().
|
inline |
Definition at line 87 of file util.hpp.
References memcpy(), sycl::_V1::event::wait(), and sycl::_V1::ext::intel::experimental::esimd::wait().
Referenced by matrix_mem_copy().
|
inlinestatic |
memcpy 2D/3D matrix specified by pitched_data.
Definition at line 454 of file memory.hpp.
References syclcompat::pitched_data::get_data_ptr(), syclcompat::pitched_data::get_pitch(), syclcompat::pitched_data::get_y(), and memcpy().
|
static |
Definition at line 315 of file memory.hpp.
Referenced by syclcompat::device_memory< T, Memory, Dimension >::init(), matrix_mem_copy(), syclcompat::experimental::detail::memcpy(), memcpy(), syclcompat::memcpy(), and syclcompat::memcpy_async().
|
inlinestatic |
|
inlinestatic |
copy 3D matrix specified by size
from 3D matrix specified by from_ptr
and from_range
to another specified by to_ptr
and to_range
.
Definition at line 358 of file memory.hpp.
|
inlinestatic |
Sets value
to the 3D memory region pointed by data
in q
.
T | The type of the element to be set. |
[in] | q | The queue in which the operation is done. |
[in] | data | Pointer to the pitched device memory region. |
[in] | value | The value to be set. |
[in] | size | 3D memory region by number of elements. |
Definition at line 248 of file memory.hpp.
References sycl::_V1::detail::array< dimensions >::get(), syclcompat::pitched_data::get_data_ptr(), syclcompat::pitched_data::get_pitch(), and syclcompat::pitched_data::get_y().
|
inlinestatic |
Set value
to the first size
bytes starting from dev_ptr
in q
.
q | The queue in which the operation is done. |
dev_ptr | Pointer to the device memory address. |
value | Value to be set. |
size | Number of bytes to be set to the value. |
Definition at line 234 of file memory.hpp.
References sycl::_V1::queue::memset().
Referenced by memset(), syclcompat::memset(), syclcompat::memset_async(), syclcompat::memset_d16(), syclcompat::memset_d16_async(), syclcompat::memset_d32(), and syclcompat::memset_d32_async().
|
inlinestatic |
Sets val
to the pitched 2D memory region pointed by ptr
in q
.
T | The type of the element to be set. |
[in] | q | The queue in which the operation is done. |
[in] | ptr | Pointer to the virtual device memory. |
[in] | pitch | The pitch size by number of elements, including padding. |
[in] | value | The value to be set. |
[in] | x | The width of memory region by number of elements. |
[in] | y | The height of memory region by number of elements. |
Definition at line 274 of file memory.hpp.
References memset().
|
static |
Definition at line 62 of file device.hpp.
Referenced by get_version().
sycl::nd_range<3> syclcompat::detail::transform_nd_range | ( | const sycl::nd_range< Dim > & | range | ) |
Definition at line 45 of file launch.hpp.
References sycl::_V1::nd_range< Dimensions >::get_global_range(), and sycl::_V1::nd_range< Dimensions >::get_local_range().
Referenced by syclcompat::experimental::launch().
|
inlinestatic |
Write data to temporary file and return absolute path to temporary file.
Temporary file is created in a temporary directory both of which have random names with only the user having access permissions. Only one temporary file will be created in the temporary directory.
Definition at line 96 of file kernel.hpp.
Referenced by load_dl_from_data().
|
inline |
Extend the 'val' to 'bit' size, zero extend for unsigned int and signed extend for signed int.
Returns a signed integer type.
Definition at line 88 of file math.hpp.
Referenced by extend_binary(), extract_and_extend2(), and extract_and_extend4().
|
inline |
SYCL default exception handler.
Definition at line 96 of file device.hpp.