Atomic extension to implement standard APIs in std::atomic. More...
Classes | |
struct | IsValidAtomicType |
class | dev_mgr |
device manager More... | |
class | vectorized_binary |
class | accessor |
class | memory_traits |
class | generic_error_type |
struct | DataType |
struct | DataType< sycl::vec< T, 2 > > |
Typedefs | |
template<typename ValueT > | |
using | complex_type = detail::complex_namespace::complex< ValueT > |
Enumerations | |
enum | memcpy_direction { host_to_host , host_to_device , device_to_host , device_to_device , automatic } |
enum class | pointer_access_attribute { host_only = 0 , device_only , host_device , end } |
Functions | |
static unsigned int | get_tid () |
static fs::path | write_data_to_file (char const *const data, size_t size) |
Write data to temporary file and return absolute path to temporary file. More... | |
static uint16_t | extract16 (unsigned char const *const ptr) |
static uint32_t | extract32 (unsigned char const *const ptr) |
static uint64_t | extract64 (unsigned char const *const ptr) |
static uint64_t | get_lib_size (char const *const blob) |
static kernel_library | load_dl_from_data (char const *const data, size_t size) |
template<typename R , typename... Types> | |
constexpr size_t | getArgumentCount (R(*f)(Types...)) |
template<int Dim> | |
sycl::nd_range< 3 > | transform_nd_range (const sycl::nd_range< Dim > &range) |
template<auto F, typename... Args> | |
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event > | launch (const sycl::nd_range< 3 > &range, sycl::queue q, Args... args) |
template<auto F, typename... Args> | |
sycl::event | launch (const sycl::nd_range< 3 > &range, size_t mem_size, sycl::queue q, Args... args) |
template<typename ValueT > | |
ValueT | clamp (ValueT val, ValueT min_val, ValueT max_val) |
template<typename ValueT > | |
int64_t | zero_or_signed_extent (ValueT val, unsigned bit) |
Extend the 'val' to 'bit' size, zero extend for unsigned int and signed extend for signed int. More... | |
template<typename RetT , bool needSat, typename AT , typename BT , typename BinaryOperation > | |
constexpr RetT | extend_binary (AT a, BT b, BinaryOperation binary_op) |
template<typename RetT , bool needSat, typename AT , typename BT , typename CT , typename BinaryOperation1 , typename BinaryOperation2 > | |
constexpr RetT | extend_binary (AT a, BT b, CT c, BinaryOperation1 binary_op, BinaryOperation2 second_op) |
template<typename ValueT > | |
bool | isnan (const ValueT a) |
static void * | malloc (size_t size, sycl::queue q) |
static constexpr size_t | get_pitch (size_t x) |
Calculate pitch (padded length of major dimension x ) by rounding up to multiple of 32. More... | |
static void * | malloc (size_t &pitch, size_t x, size_t y, size_t z, sycl::queue q) |
Malloc pitched 3D data. More... | |
template<class T > | |
static sycl::event | fill (sycl::queue q, void *dev_ptr, const T &pattern, size_t count) |
Set pattern to the first count elements of type T starting from dev_ptr . More... | |
static sycl::event | memset (sycl::queue q, void *dev_ptr, int value, size_t size) |
Set value to the first size bytes starting from dev_ptr in q . More... | |
template<typename T > | |
static std::vector< sycl::event > | memset (sycl::queue q, pitched_data data, const T &value, sycl::range< 3 > size) |
Sets value to the 3D memory region pointed by data in q . More... | |
template<typename T > | |
static std::vector< sycl::event > | memset (sycl::queue q, void *ptr, size_t pitch, const T &value, size_t x, size_t y) |
Sets val to the pitched 2D memory region pointed by ptr in q . More... | |
static pointer_access_attribute | get_pointer_attribute (sycl::queue q, const void *ptr) |
static memcpy_direction | deduce_memcpy_direction (sycl::queue q, void *to_ptr, const void *from_ptr) |
static sycl::event | memcpy (sycl::queue q, void *to_ptr, const void *from_ptr, size_t size, const std::vector< sycl::event > &dep_events={}) |
static size_t | get_copy_range (sycl::range< 3 > size, size_t slice, size_t pitch) |
static size_t | get_offset (sycl::id< 3 > id, size_t slice, size_t pitch) |
static std::vector< sycl::event > | memcpy (sycl::queue q, void *to_ptr, const void *from_ptr, sycl::range< 3 > to_range, sycl::range< 3 > from_range, sycl::id< 3 > to_id, sycl::id< 3 > from_id, sycl::range< 3 > size, const std::vector< sycl::event > &dep_events={}) |
copy 3D matrix specified by size from 3D matrix specified by from_ptr and from_range to another specified by to_ptr and to_range . More... | |
static std::vector< sycl::event > | memcpy (sycl::queue q, pitched_data to, sycl::id< 3 > to_id, pitched_data from, sycl::id< 3 > from_id, sycl::range< 3 > size) |
memcpy 2D/3D matrix specified by pitched_data. More... | |
static std::vector< sycl::event > | memcpy (sycl::queue q, void *to_ptr, const void *from_ptr, size_t to_pitch, size_t from_pitch, size_t x, size_t y) |
memcpy 2D matrix with pitch. More... | |
static sycl::event | combine_events (std::vector< sycl::event > &events, sycl::queue q) |
void | matrix_mem_copy (void *to_ptr, const void *from_ptr, int to_ld, int from_ld, int rows, int cols, int elem_size, sycl::queue queue=syclcompat::get_default_queue(), bool async=false) |
template<typename T > | |
void | matrix_mem_copy (T *to_ptr, const T *from_ptr, int to_ld, int from_ld, int rows, int cols, sycl::queue queue=get_default_queue(), bool async=false) |
Copy matrix data. More... | |
Variables | |
auto | exception_handler |
SYCL default exception handler. More... | |
Atomic extension to implement standard APIs in std::atomic.
using syclcompat::detail::complex_type = typedef detail::complex_namespace::complex<ValueT> |
Enumerator | |
---|---|
host_to_host | |
host_to_device | |
device_to_host | |
device_to_device | |
automatic |
Definition at line 79 of file memory.hpp.
|
strong |
Enumerator | |
---|---|
host_only | |
device_only | |
host_device | |
end |
Definition at line 244 of file memory.hpp.
|
inline |
Definition at line 51 of file math.hpp.
Referenced by syclcompat::clamp().
|
static |
Definition at line 439 of file memory.hpp.
References sycl::_V1::handler::depends_on(), sycl::_V1::handler::host_task(), and sycl::_V1::queue::submit().
Referenced by syclcompat::memcpy_async(), syclcompat::memset_async(), syclcompat::memset_d16_async(), and syclcompat::memset_d32_async().
|
static |
Definition at line 264 of file memory.hpp.
References device_to_device, device_to_host, end, get_pointer_attribute(), host_to_device, and host_to_host.
|
inlineconstexpr |
Definition at line 94 of file math.hpp.
References syclcompat::max(), syclcompat::min(), and zero_or_signed_extent().
|
inlineconstexpr |
Definition at line 106 of file math.hpp.
References syclcompat::max(), syclcompat::min(), and zero_or_signed_extent().
|
inlinestatic |
Definition at line 191 of file kernel.hpp.
Referenced by get_lib_size().
|
inlinestatic |
Definition at line 200 of file kernel.hpp.
Referenced by get_lib_size().
|
inlinestatic |
Definition at line 211 of file kernel.hpp.
Referenced by get_lib_size().
|
inlinestatic |
Set pattern
to the first count
elements of type T
starting from dev_ptr
.
T | Datatype of the pattern to be set. |
q | The queue in which the operation is done. |
dev_ptr | Pointer to the device memory address. |
pattern | Pattern of type T to be set. |
count | Number of elements to be set to the patten. |
Definition at line 185 of file memory.hpp.
References sycl::_V1::queue::fill().
|
inlinestatic |
Definition at line 292 of file memory.hpp.
References sycl::_V1::detail::array< dimensions >::get().
|
inlinestatic |
Definition at line 226 of file kernel.hpp.
References extract16(), extract32(), and extract64().
Referenced by syclcompat::load_kernel_library_mem().
|
inlinestatic |
Definition at line 297 of file memory.hpp.
|
inlinestaticconstexpr |
Calculate pitch (padded length of major dimension x
) by rounding up to multiple of 32.
x | The dimension to be padded (in bytes) |
Definition at line 158 of file memory.hpp.
Referenced by sycl::_V1::image< Dimensions, AllocatorT >::get_pitch(), and malloc().
|
static |
Definition at line 251 of file memory.hpp.
References device_only, sycl::_V1::queue::get_context(), host_device, and host_only.
Referenced by deduce_memcpy_direction().
|
inlinestatic |
Definition at line 615 of file device.hpp.
Referenced by syclcompat::detail::dev_mgr::current_device_id(), and syclcompat::detail::dev_mgr::select_device().
|
constexpr |
Definition at line 40 of file launch.hpp.
Referenced by launch().
|
inline |
Definition at line 121 of file math.hpp.
References syclcompat::isnan().
Referenced by syclcompat::compare(), syclcompat::fmax_nan(), syclcompat::fmin_nan(), syclcompat::isnan(), syclcompat::relu(), and syclcompat::unordered_compare().
sycl::event syclcompat::detail::launch | ( | const sycl::nd_range< 3 > & | range, |
size_t | mem_size, | ||
sycl::queue | q, | ||
Args... | args | ||
) |
Definition at line 71 of file launch.hpp.
References getArgumentCount(), syclcompat::local_mem(), sycl::_V1::handler::parallel_for(), and sycl::_V1::queue::submit().
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event> syclcompat::detail::launch | ( | const sycl::nd_range< 3 > & | range, |
sycl::queue | q, | ||
Args... | args | ||
) |
Definition at line 59 of file launch.hpp.
References getArgumentCount(), and sycl::_V1::queue::parallel_for().
|
inlinestatic |
Definition at line 357 of file kernel.hpp.
References write_data_to_file().
Referenced by syclcompat::load_kernel_library(), and syclcompat::load_kernel_library_mem().
|
inlinestatic |
Malloc pitched 3D data.
[out] | pitch | returns the calculated pitch (in bytes) |
[in] | x | width of the allocation (in bytes) |
[in] | y | height of the allocation |
[in] | z | depth of the allocation |
[in] | q | The queue in which the operation is done. |
Definition at line 169 of file memory.hpp.
References get_pitch(), and malloc().
|
inlinestatic |
Definition at line 150 of file memory.hpp.
References sycl::_V1::queue::get_context(), and sycl::_V1::queue::get_device().
Referenced by malloc().
|
inline |
Copy matrix data.
The default leading dimension is column.
[out] | to_ptr | A pointer points to the destination location. |
[in] | from_ptr | A pointer points to the source location. |
[in] | to_ld | The leading dimension the destination matrix. |
[in] | from_ld | The leading dimension the source matrix. |
[in] | rows | The number of rows of the source matrix. |
[in] | cols | The number of columns of the source matrix. |
[in] | queue | The queue where the routine should be executed. |
[in] | async | If this argument is true, the return of the function does NOT guarantee the copy is completed. |
Definition at line 122 of file util.hpp.
References matrix_mem_copy().
|
inline |
Definition at line 86 of file util.hpp.
References memcpy(), sycl::_V1::event::wait(), and sycl::_V1::ext::intel::experimental::esimd::wait().
Referenced by matrix_mem_copy().
|
inlinestatic |
memcpy 2D/3D matrix specified by pitched_data.
Definition at line 420 of file memory.hpp.
References syclcompat::pitched_data::get_data_ptr(), syclcompat::pitched_data::get_pitch(), syclcompat::pitched_data::get_y(), and syclcompat::memcpy().
|
static |
Definition at line 283 of file memory.hpp.
Referenced by matrix_mem_copy().
|
inlinestatic |
memcpy 2D matrix with pitch.
Definition at line 430 of file memory.hpp.
References syclcompat::memcpy().
|
inlinestatic |
copy 3D matrix specified by size
from 3D matrix specified by from_ptr
and from_range
to another specified by to_ptr
and to_range
.
Definition at line 304 of file memory.hpp.
|
inlinestatic |
Sets value
to the 3D memory region pointed by data
in q
.
T | The type of the element to be set. |
[in] | q | The queue in which the operation is done. |
[in] | data | Pointer to the pitched device memory region. |
[in] | value | The value to be set. |
[in] | size | 3D memory region by number of elements. |
Definition at line 211 of file memory.hpp.
References sycl::_V1::detail::array< dimensions >::get(), syclcompat::pitched_data::get_data_ptr(), syclcompat::pitched_data::get_pitch(), and syclcompat::pitched_data::get_y().
|
inlinestatic |
Set value
to the first size
bytes starting from dev_ptr
in q
.
q | The queue in which the operation is done. |
dev_ptr | Pointer to the device memory address. |
value | Value to be set. |
size | Number of bytes to be set to the value. |
Definition at line 197 of file memory.hpp.
References sycl::_V1::queue::memset().
Referenced by memset().
|
inlinestatic |
Sets val
to the pitched 2D memory region pointed by ptr
in q
.
T | The type of the element to be set. |
[in] | q | The queue in which the operation is done. |
[in] | ptr | Pointer to the virtual device memory. |
[in] | pitch | The pitch size by number of elements, including padding. |
[in] | value | The value to be set. |
[in] | x | The width of memory region by number of elements. |
[in] | y | The height of memory region by number of elements. |
Definition at line 237 of file memory.hpp.
References memset().
sycl::nd_range<3> syclcompat::detail::transform_nd_range | ( | const sycl::nd_range< Dim > & | range | ) |
Definition at line 45 of file launch.hpp.
References sycl::_V1::nd_range< Dimensions >::get_global_range(), and sycl::_V1::nd_range< Dimensions >::get_local_range().
|
inlinestatic |
Write data to temporary file and return absolute path to temporary file.
Temporary file is created in a temporary directory both of which have random names with only the user having access permissions. Only one temporary file will be created in the temporary directory.
Definition at line 96 of file kernel.hpp.
Referenced by load_dl_from_data().
|
inline |
Extend the 'val' to 'bit' size, zero extend for unsigned int and signed extend for signed int.
Definition at line 85 of file math.hpp.
Referenced by extend_binary().
|
inline |
SYCL default exception handler.
Definition at line 64 of file device.hpp.