Functions | |
template<uint32_t SLMSize> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::slm_init () |
Declare per-work-group slm size. More... | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::slm_init (uint32_t size) |
Declare per-work-group slm size. More... | |
template<typename T , int N> | |
__ESIMD_API std::enable_if_t<(N==1||N==8||N==16||N==32), simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > offsets, simd_mask< N > mask=1) |
Gather operation over the Shared Local Memory. More... | |
template<typename T > | |
__ESIMD_API T | sycl::_V1::ext::intel::esimd::slm_scalar_load (uint32_t offset) |
Load a scalar value from the Shared Local Memory. More... | |
template<typename T , int N> | |
__ESIMD_API std::enable_if_t<(N==1||N==8||N==16||N==32) &&(sizeof(T)<=4)> | sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N > offsets, simd< T, N > vals, simd_mask< N > mask=1) |
Scatter operation over the Shared Local Memory. More... | |
template<typename T > | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::slm_scalar_store (uint32_t offset, T val) |
Store a scalar value into the Shared Local Memory. More... | |
template<typename T , int N, rgba_channel_mask RGBAMask> | |
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> > | sycl::_V1::ext::intel::esimd::slm_gather_rgba (simd< uint32_t, N > offsets, simd_mask< N > mask=1) |
Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More... | |
template<typename T , int N, rgba_channel_mask Mask> | |
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4)> | sycl::_V1::ext::intel::esimd::slm_scatter_rgba (simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1) |
Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More... | |
template<typename T , int N> | |
__ESIMD_API simd< T, N > | sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset) |
Loads a contiguous block of memory from the SLM at given offset and returns the loaded data as a vector. More... | |
template<typename T , int N> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t offset, simd< T, N > vals) |
Stores elements of a vector to a contiguous block of SLM at given offset. More... | |
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>> | |
__ESIMD_API simd< Tx, N > | sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd_mask< N > mask) |
Atomic update operation performed on SLM. More... | |
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>> | |
__ESIMD_API simd< Tx, N > | sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd< Tx, N > src0, simd_mask< N > mask) |
Atomic update operation performed on SLM. More... | |
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>> | |
__ESIMD_API simd< Tx, N > | sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd< Tx, N > src0, simd< Tx, N > src1, simd_mask< N > mask) |
Atomic update operation performed on SLM. More... | |
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update | ( | simd< uint32_t, N > | offsets, |
simd< Tx, N > | src0, | ||
simd< Tx, N > | src1, | ||
simd_mask< N > | mask | ||
) |
Atomic update operation performed on SLM.
Two source operands version. See description of template and function parameters in atomic update operation docs.
Definition at line 1915 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update | ( | simd< uint32_t, N > | offsets, |
simd< Tx, N > | src0, | ||
simd_mask< N > | mask | ||
) |
Atomic update operation performed on SLM.
One source operands version. See description of template and function parameters in atomic update operation docs.
Definition at line 1903 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update | ( | simd< uint32_t, N > | offsets, |
simd_mask< N > | mask | ||
) |
Atomic update operation performed on SLM.
No source operands version. See description of template and function parameters in atomic update operation docs.
Definition at line 1892 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API simd<T, N> sycl::_V1::ext::intel::esimd::slm_block_load | ( | uint32_t | offset | ) |
Loads a contiguous block of memory from the SLM at given offset and returns the loaded data as a vector.
T | Element type. |
N | Number of elements to load, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long. |
offset | The offset to load from in bytes. Must be oword-aligned. |
Definition at line 1849 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::esimd::detail::isPowerOf2().
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_block_store | ( | uint32_t | offset, |
simd< T, N > | vals | ||
) |
Stores elements of a vector to a contiguous block of SLM at given offset.
T | Element type. |
N | Number of elements to store, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long. |
offset | The offset in bytes to store at. Must be oword-aligned. |
vals | The vector to store. |
Definition at line 1873 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::esimd::detail::isPowerOf2().
__ESIMD_API std::enable_if_t<(N == 1 || N == 8 || N == 16 || N == 32), simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | simd< uint32_t, N > | offsets, |
simd_mask< N > | mask = 1 |
||
) |
Gather operation over the Shared Local Memory.
This API has almost the same interface as the accessor-based gather, except that it does not have the accessor and the global offset parameters.
Definition at line 1760 of file memory.hpp.
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba | ( | simd< uint32_t, N > | offsets, |
simd_mask< N > | mask = 1 |
||
) |
Gather data from the Shared Local Memory at specified offsets
and return it as simd vector.
See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.
T | The element type of the returned vector. |
N | The number of elements to access. |
RGBAMask | Pixel's channel mask. |
offsets | Byte offsets within the SLM of each element. |
mask | Operation mask. All-1 by default. |
N
- element vector. Definition at line 1811 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init | ( | ) |
Declare per-work-group slm size.
GPU RT/driver requires this function to be called in the beginning of the kernel using SLM. There must be only 1 call site of slm_init() per kernel. If slm_init is called from some function F called from the kernel, then inlining of F into the kernel must be managed/guaranteed. slm_init<SLMSize> can also be used together with slm_allocator() class. In such cases slm_allocator<AdditionalMem> allocates extra chunk of SLM memory and the final amount of allocated SLM may be bigger than what is requested by slm_init. See more details on slm_allocator class usage at it's declaration and ESIMD extension SPEC.
SLMSize | Shared Local Memory (SLM) size |
Definition at line 1738 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init | ( | uint32_t | size | ) |
Declare per-work-group slm size.
Non-constant argument version to be used with specialization constants only. Same restrictions are applied to this function as to it's template variant slm_init<SLMSize>(). This version has an additional restriction - it cannot be used together
size | Shared Local Memory (SLM) size to be allocated for each work-group of ESIMD kernel. |
Definition at line 1750 of file memory.hpp.
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load | ( | uint32_t | offset | ) |
Load a scalar value from the Shared Local Memory.
T | type of the value |
offset | SLM offset in bytes |
Definition at line 1770 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store | ( | uint32_t | offset, |
T | val | ||
) |
Store a scalar value into the Shared Local Memory.
T | type of the value |
offset | SLM offset in bytes |
val | value to store |
Definition at line 1794 of file memory.hpp.
__ESIMD_API std::enable_if_t<(N == 1 || N == 8 || N == 16 || N == 32) && (sizeof(T) <= 4)> sycl::_V1::ext::intel::esimd::slm_scatter | ( | simd< uint32_t, N > | offsets, |
simd< T, N > | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
Scatter operation over the Shared Local Memory.
This API has almost the same interface as the accessor-based scatter, except that it does not have the accessor and the global offset parameters.
Definition at line 1783 of file memory.hpp.
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba | ( | simd< uint32_t, N > | offsets, |
simd< T, N *get_num_channels_enabled(Mask)> | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
Gather data from the Shared Local Memory at specified offsets
and return it as simd vector.
See usm_scatter_rgba for information about the operation semantics and parameter restrictions/interdependencies.
T | The element type of the returned vector. |
N | The number of elements to access. |
Mask | Pixel's channel mask. |
offsets | Byte offsets within the SLM of each element. |
vals | values to be written. |
mask | Operation mask. All-1 by default. |
Definition at line 1829 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().