ESIMD APIs to access memory via accessors, USM pointers, perform per-element atomic operations. More...
Modules | |
Atomic memory access. | |
Memory access functions which perform per-lane atomic update using given operation. "Per-lane" means that the atomicity guarantees of a vector atomic operation are the same as of N independent scalar atomic operations per lane (N is number of lanes). | |
Shared local memory access functions. | |
LSC-specific memory access APIs. | |
This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM. | |
Named barrier APIs. | |
HW thread . | |
Namespaces | |
sycl::_V1::ext::intel::esimd::detail | |
Enumerations | |
enum | sycl::_V1::ext::intel::esimd::fence_mask : uint8_t { sycl::_V1::ext::intel::esimd::global_coherent_fence = 0x1, sycl::_V1::ext::intel::esimd::l3_flush_instructions = 0x2, sycl::_V1::ext::intel::esimd::l3_flush_texture_data = 0x4, sycl::_V1::ext::intel::esimd::l3_flush_constant_data = 0x8, sycl::_V1::ext::intel::esimd::l3_flush_rw_data = 0x10, sycl::_V1::ext::intel::esimd::local_barrier = 0x20, sycl::_V1::ext::intel::esimd::l1_flush_ro_data = 0x40, sycl::_V1::ext::intel::esimd::sw_barrier = 0x80 } |
Represetns a bit mask to control behavior of esimd::fence. More... | |
Functions | |
template<typename AccessorTy > | |
__ESIMD_API SurfaceIndex | sycl::_V1::ext::intel::esimd::get_surface_index (AccessorTy acc) |
Get surface index corresponding to a SYCL accessor. More... | |
template<typename Tx , int N, typename Toffset > | |
__ESIMD_API simd< Tx, N > | sycl::_V1::ext::intel::esimd::gather (const Tx *p, simd< Toffset, N > offsets, simd_mask< N > mask=1) |
Loads ("gathers") elements from different memory locations and returns a vector of them. More... | |
template<typename Tx , int N, typename Toffset , typename RegionTy = region1d_t<Toffset, N, 1>> | |
__ESIMD_API simd< Tx, N > | sycl::_V1::ext::intel::esimd::gather (const Tx *p, simd_view< Toffset, RegionTy > offsets, simd_mask< N > mask=1) |
A variation of gather API with offsets represented as simd_view object. More... | |
template<typename Tx , int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, simd< Tx, N > > | sycl::_V1::ext::intel::esimd::gather (const Tx *p, Toffset offset, simd_mask< N > mask=1) |
A variation of gather API with offsets represented as scalar. More... | |
template<typename Tx , int N, typename Toffset > | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::scatter (Tx *p, simd< Toffset, N > offsets, simd< Tx, N > vals, simd_mask< N > mask=1) |
Writes ("scatters") elements of the input vector to different memory locations. More... | |
template<typename Tx , int N, typename Toffset , typename RegionTy = region1d_t<Toffset, N, 1>> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::scatter (Tx *p, simd_view< Toffset, RegionTy > offsets, simd< Tx, N > vals, simd_mask< N > mask=1) |
A variation of scatter API with offsets represented as simd_view object. More... | |
template<typename Tx , int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&N==1 > | sycl::_V1::ext::intel::esimd::scatter (Tx *p, Toffset offset, simd< Tx, N > vals, simd_mask< N > mask=1) |
A variation of scatter API with offsets represented as scalar. More... | |
template<typename Tx , int N, typename Flags = vector_aligned_tag, class T = detail::__raw_t<Tx>, typename = std::enable_if_t<is_simd_flag_type_v<Flags>>> | |
__ESIMD_API simd< Tx, N > | sycl::_V1::ext::intel::esimd::block_load (const Tx *addr, Flags={}) |
Loads a contiguous block of memory from given memory address and returns the loaded data as a vector. More... | |
template<typename Tx , int N, typename AccessorTy , typename Flags = vector_aligned_tag, typename = std::enable_if_t<is_simd_flag_type_v<Flags>>, class T = detail::__raw_t<Tx>> | |
__ESIMD_API simd< Tx, N > | sycl::_V1::ext::intel::esimd::block_load (AccessorTy acc, uint32_t offset, Flags={}) |
Loads a contiguous block of memory from given accessor and offset and returns the loaded data as a vector. More... | |
template<typename Tx , int N, class T = detail::__raw_t<Tx>> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::block_store (Tx *p, simd< Tx, N > vals) |
Stores elements of a vector to a contiguous block of memory at given address. More... | |
template<typename Tx , int N, typename AccessorTy , class T = detail::__raw_t<Tx>> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::block_store (AccessorTy acc, uint32_t offset, simd< Tx, N > vals) |
Stores elements of a vector to a contiguous block of memory represented by an accessor and an offset within this accessor. More... | |
template<typename T , int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t<(sizeof(T)<=4) &&(N==1||N==8||N==16||N==32) &&!std::is_pointer< AccessorTy >::value, simd< T, N > > | sycl::_V1::ext::intel::esimd::gather (AccessorTy acc, simd< uint32_t, N > offsets, uint32_t glob_offset=0, simd_mask< N > mask=1) |
template<typename T , int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t<(sizeof(T)<=4) &&(N==1||N==8||N==16||N==32) &&!std::is_pointer< AccessorTy >::value > | sycl::_V1::ext::intel::esimd::scatter (AccessorTy acc, simd< uint32_t, N > offsets, simd< T, N > vals, uint32_t glob_offset=0, simd_mask< N > mask=1) |
template<typename T , typename AccessorTy > | |
__ESIMD_API T | sycl::_V1::ext::intel::esimd::scalar_load (AccessorTy acc, uint32_t offset) |
Load a scalar value from an accessor. More... | |
template<typename T , typename AccessorTy > | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::scalar_store (AccessorTy acc, uint32_t offset, T val) |
Store a scalar value into an accessor. More... | |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename T , int N, typename Toffset > | |
__ESIMD_API simd< T, N *get_num_channels_enabled(RGBAMask)> | sycl::_V1::ext::intel::esimd::gather_rgba (const T *p, simd< Toffset, N > offsets, simd_mask< N > mask=1) |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename T , int N, typename Toffset , typename RegionTy = region1d_t<Toffset, N, 1>> | |
__ESIMD_API simd< T, N *get_num_channels_enabled(RGBAMask)> | sycl::_V1::ext::intel::esimd::gather_rgba (const T *p, simd_view< Toffset, RegionTy > offsets, simd_mask< N > mask=1) |
A variation of gather_rgba API with offsets represented as simd_view object. More... | |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename T , int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, simd< T, N *get_num_channels_enabled(RGBAMask)> > | sycl::_V1::ext::intel::esimd::gather_rgba (const T *p, Toffset offset, simd_mask< N > mask=1) |
A variation of gather_rgba API with offsets represented as scalar. More... | |
template<typename T , int N, rgba_channel_mask RGBAMask> | |
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&sizeof(T)==4, simd< T, N *get_num_channels_enabled(RGBAMask)> > | sycl::_V1::ext::intel::esimd::gather_rgba (const T *p, simd< uint32_t, N > offsets, simd_mask< N > mask=1) |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename T , int N, typename Toffset > | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::scatter_rgba (T *p, simd< Toffset, N > offsets, simd< T, N *get_num_channels_enabled(RGBAMask)> vals, simd_mask< N > mask=1) |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename T , int N, typename Toffset , typename RegionTy = region1d_t<Toffset, N, 1>> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::scatter_rgba (T *p, simd_view< Toffset, RegionTy > offsets, simd< T, N *get_num_channels_enabled(RGBAMask)> vals, simd_mask< N > mask=1) |
A variation of scatter_rgba API with offsets represented as simd_view object. More... | |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename T , int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&N==1 > | sycl::_V1::ext::intel::esimd::scatter_rgba (T *p, Toffset offset, simd< T, N *get_num_channels_enabled(RGBAMask)> vals, simd_mask< N > mask=1) |
A variation of scatter_rgba API with offsets represented as scalar. More... | |
template<typename T , int N, rgba_channel_mask RGBAMask> | |
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&sizeof(T)==4 > | sycl::_V1::ext::intel::esimd::scatter_rgba (T *p, simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(RGBAMask)> vals, simd_mask< N > mask=1) |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename AccessorT , int N, typename T = typename AccessorT::value_type> | |
__ESIMD_API std::enable_if_t<((N==8||N==16||N==32) &&sizeof(T)==4 &&!std::is_pointer_v< AccessorT >), simd< T, N *get_num_channels_enabled(RGBAMask)> > | sycl::_V1::ext::intel::esimd::gather_rgba (AccessorT acc, simd< uint32_t, N > offsets, uint32_t global_offset=0, simd_mask< N > mask=1) |
Gather and transpose pixels from the given memory locations defined by the base specified by acc , the global offset global_offset and a vector of offsets offsets . More... | |
template<rgba_channel_mask RGBAMask = rgba_channel_mask::ABGR, typename AccessorT , int N, typename T = typename AccessorT::value_type> | |
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&sizeof(T)==4 &&!std::is_pointer_v< AccessorT > > | sycl::_V1::ext::intel::esimd::scatter_rgba (AccessorT acc, simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(RGBAMask)> vals, uint32_t global_offset=0, simd_mask< N > mask=1) |
Gather data from the memory addressed by accessor acc , offset common for all loaded elements global_offset and per-element offsets offsets , and return it as simd vector. More... | |
template<uint8_t cntl> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::fence () |
esimd::fence sets the memory read/write order. More... | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::fence (fence_mask cntl) |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::barrier () |
Generic work-group barrier. More... | |
template<typename T , int m, int N, typename AccessorTy , unsigned plane = 0> | |
__ESIMD_API simd< T, m *N > | sycl::_V1::ext::intel::esimd::media_block_load (AccessorTy acc, unsigned x, unsigned y) |
Media block load. More... | |
template<typename T , int m, int N, typename AccessorTy , unsigned plane = 0> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::media_block_store (AccessorTy acc, unsigned x, unsigned y, simd< T, m *N > vals) |
Media block store. More... | |
template<split_barrier_action flag> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::split_barrier () |
Generic work-group split barrier. More... | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::split_barrier (split_barrier_action flag) |
ESIMD APIs to access memory via accessors, USM pointers, perform per-element atomic operations.
enum sycl::_V1::ext::intel::esimd::fence_mask : uint8_t |
Represetns a bit mask to control behavior of esimd::fence.
Enum elements define semantics of the bits in the mask.
Definition at line 1225 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::barrier | ( | ) |
Generic work-group barrier.
Performs barrier synchronization for all threads within the same thread group. The barrier instruction causes the executing thread to wait until all threads in the same thread group have executed the barrier instruction. Memory ordering is also guaranteed by this instruction. The behavior is undefined if this instruction is executed in divergent control flow.
Definition at line 1262 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::global_coherent_fence, and sycl::_V1::ext::intel::esimd::local_barrier.
Referenced by sycl::_V1::ext::intel::experimental::esimd::named_barrier_signal().
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::block_load | ( | AccessorTy | acc, |
uint32_t | offset, | ||
Flags | = {} |
||
) |
Loads a contiguous block of memory from given accessor and offset and returns the loaded data as a vector.
Actual code generated depends on the alignment parameter.
Tx | Element type. |
N | Number of elements to load, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long. |
AccessorTy | Accessor type (auto-deduced). |
Flags | The alignment specifier type tag. Auto-deduced from the Flags parameter. If it is less than 16 , then slower unaligned access is generated, othewise the access is aligned. |
acc | The accessor. |
offset | The offset to load from in bytes. |
Flags | Specifies the alignment. |
Definition at line 329 of file memory.hpp.
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::block_load | ( | const Tx * | addr, |
Flags | = {} |
||
) |
Loads a contiguous block of memory from given memory address and returns the loaded data as a vector.
Actual code generated depends on the alignment parameter.
Tx | Element type. |
N | Number of elements to load, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long. |
Flags | The alignment specifier type tag. Auto-deduced from the Flags parameter. If it is less than 16 , then slower unaligned access is generated, othewise the access is aligned. |
addr | The address to load from. |
Flags | Specifies the alignment. |
Definition at line 290 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::block_store | ( | AccessorTy | acc, |
uint32_t | offset, | ||
simd< Tx, N > | vals | ||
) |
Stores elements of a vector to a contiguous block of memory represented by an accessor and an offset within this accessor.
Tx | Element type. |
N | Number of elements to store, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long. |
AccessorTy | Accessor type (auto-deduced). |
acc | The accessor to store to. |
offset | The offset to store at. It is in bytes and must be a multiple of 16 . |
vals | The vector to store. |
Definition at line 393 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::isPowerOf2().
__ESIMD_API void sycl::_V1::ext::intel::esimd::block_store | ( | Tx * | p, |
simd< Tx, N > | vals | ||
) |
Stores elements of a vector to a contiguous block of memory at given address.
The address must be at least 16
bytes-aligned.
Tx | Element type. |
N | Number of elements to store, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long. |
p | The memory address to store at. |
vals | The vector to store. |
Definition at line 365 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::isPowerOf2().
__ESIMD_API void sycl::_V1::ext::intel::esimd::fence | ( | ) |
esimd::fence sets the memory read/write order.
cntl | A bitmask composed from fence_mask bits. |
Definition at line 1249 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::fence | ( | fence_mask | cntl | ) |
Definition at line 1252 of file memory.hpp.
__ESIMD_API std::enable_if_t<(sizeof(T) <= 4) && (N == 1 || N == 8 || N == 16 || N == 32) && !std::is_pointer<AccessorTy>::value, simd<T, N> > sycl::_V1::ext::intel::esimd::gather | ( | AccessorTy | acc, |
simd< uint32_t, N > | offsets, | ||
uint32_t | glob_offset = 0 , |
||
simd_mask< N > | mask = 1 |
||
) |
Collects elements located at given offsets in an accessor and returns them as a single simd object. An element can be 1, 2 or 4-byte value.
T | Element type; can only be a 1,2,4-byte integer, sycl::half or float . |
N | The number of vector elements. Can be 1 , 8 , 16 or 32 . |
AccessorTy | The accessor type. |
acc | The accessor to gather from. |
offsets | Per-element offsets in bytes. |
glob_offset | Offset in bytes added to each individual element's offset to compute actual memory access offset for that element. |
mask | Memory access mask. Elements with zero corresponding mask's predicate are not accessed, their values in the resulting vector are undefined. |
Definition at line 532 of file memory.hpp.
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::gather | ( | const Tx * | p, |
simd< Toffset, N > | offsets, | ||
simd_mask< N > | mask = 1 |
||
) |
Loads ("gathers") elements from different memory locations and returns a vector of them.
Each memory location is base address plus an offset - a value of the corresponding element in the input offset vector. Access to any element's memory location can be disabled via the input vector of predicates (mask).
Tx | Element type, must be of size 4 or less. |
N | Number of elements to read; can be 1 , 2 , 4 , 8 , 16 or 32 . |
p | The base address. |
offsets | the vector of 32-bit or 64-bit offsets in bytes. For each lane i , ((byte*)p + offsets[i]) must be element size aligned. |
mask | The access mask, defaults to all 1s. |
Definition at line 130 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::isPowerOf2().
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::gather | ( | const Tx * | p, |
simd_view< Toffset, RegionTy > | offsets, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of gather
API with offsets
represented as simd_view
object.
Tx | Element type, must be of size 4 or less. |
N | Number of elements to read; can be 1 , 2 , 4 , 8 , 16 or 32 . |
p | The base address. |
offsets | the simd_view of 32-bit or 64-bit offsets in bytes. For each lane i , ((byte*)p + offsets[i]) must be element size aligned. |
mask | The access mask, defaults to all 1s. |
Definition at line 170 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, simd<Tx, N> > sycl::_V1::ext::intel::esimd::gather | ( | const Tx * | p, |
Toffset | offset, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of gather
API with offsets
represented as scalar.
Tx | Element type, must be of size 4 or less. |
N | Number of elements to read; can be 1 , 2 , 4 , 8 , 16 or 32 . |
p | The base address. |
offset | the scalar 32-bit or 64-bit offset in bytes. ((byte*)p + offset) must be element size aligned. |
mask | The access mask, defaults to all 1s. |
Definition at line 192 of file memory.hpp.
__ESIMD_API std::enable_if_t<((N == 8 || N == 16 || N == 32) && sizeof(T) == 4 && !std::is_pointer_v<AccessorT>), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::gather_rgba | ( | AccessorT | acc, |
simd< uint32_t, N > | offsets, | ||
uint32_t | global_offset = 0 , |
||
simd_mask< N > | mask = 1 |
||
) |
Gather and transpose pixels from the given memory locations defined by the base specified by acc
, the global offset global_offset
and a vector of offsets offsets
.
Up to 4 32-bit data elements may be accessed at each address depending on the channel mask RGBAMask
. Each pixel's address must be 4-byte aligned. For usage examples, see usm_gather_rgba above, the only difference would be the usage of an accessor instead of a usm pointer.
RGBAMask | A pixel's channel mask. |
AccessorT | The accessor type for the memory to be loaded/gathered. The returned vector elements mutch the accessor data type. The loaded elements must be 4 bytes in size. |
N | Number of pixels to access (matches the size of the offsets vector). Must be 8, 16 or 32. |
acc | The accessor representing memory address of the access. |
offsets | Byte offsets of the pixels relative to the base pointer. |
global_offset | Byte offset of the pixels relative to the base pointer. |
mask | Memory access mask. Pixels with zero corresponding mask's predicate are not accessed. Their values in the resulting vector are undefined. |
Tx
. Definition at line 842 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API simd<T, N * get_num_channels_enabled(RGBAMask)> sycl::_V1::ext::intel::esimd::gather_rgba | ( | const T * | p, |
simd< Toffset, N > | offsets, | ||
simd_mask< N > | mask = 1 |
||
) |
Gather and transpose pixels from given memory locations defined by the base pointer p
and offsets
. Up to 4 32-bit data elements may be accessed at each address depending on the channel mask Mask
template parameter. Each pixel's address must be 4 byte aligned. As an example, let's assume we want to read n
pixels at address addr
, skipping G
and B
channels. Each channel is a 32-bit float and the pixel data at given address in memory is:
Then this can be achieved by using
Returned x
will contain 2*n
float
elements:
T | Element type of the returned vector. Must be 4 bytes in size. |
N | Number of pixels to access (matches the size of the offsets vector). Must be 8, 16 or 32. |
Mask | A pixel's channel mask. |
p | The USM base pointer representing memory address of the access. |
offsets | vector of byte offsets of the pixels relative to the base pointer. |
mask | Memory access mask. Pixels with zero corresponding mask's predicate are not accessed. Their values in the resulting vector are undefined. |
Tx
. Definition at line 637 of file memory.hpp.
__ESIMD_API std::enable_if_t< (N == 8 || N == 16 || N == 32) && sizeof(T) == 4, simd<T, N * get_num_channels_enabled( RGBAMask)> > sycl::_V1::ext::intel::esimd::gather_rgba | ( | const T * | p, |
simd< uint32_t, N > | offsets, | ||
simd_mask< N > | mask = 1 |
||
) |
Definition at line 701 of file memory.hpp.
__ESIMD_API simd<T, N * get_num_channels_enabled(RGBAMask)> sycl::_V1::ext::intel::esimd::gather_rgba | ( | const T * | p, |
simd_view< Toffset, RegionTy > | offsets, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of gather_rgba
API with offsets
represented as simd_view
object.
T | Element type of the returned vector. Must be 4 bytes in size. |
N | Number of pixels to access (matches the size of the offsets vector). Must be 8, 16 or 32. |
Mask | A pixel's channel mask. |
p | The USM base pointer representing memory address of the access. |
offsets | simd_view of byte offsets of the pixels relative to the base pointer. |
mask | Memory access mask. Pixels with zero corresponding mask's predicate are not accessed. Their values in the resulting vector are undefined. |
Tx
. Definition at line 667 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::gather_rgba | ( | const T * | p, |
Toffset | offset, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of gather_rgba
API with offsets
represented as scalar.
T | Element type of the returned vector. Must be 4 bytes in size. |
N | Number of pixels to access (matches the size of the offsets vector). Must be 8, 16 or 32. |
Mask | A pixel's channel mask. |
p | The USM base pointer representing memory address of the access. |
offset | scalar byte offsets of the pixels relative to the base pointer. |
mask | Memory access mask. Pixels with zero corresponding mask's predicate are not accessed. Their values in the resulting vector are undefined. |
Tx
. Definition at line 692 of file memory.hpp.
__ESIMD_API SurfaceIndex sycl::_V1::ext::intel::esimd::get_surface_index | ( | AccessorTy | acc | ) |
Get surface index corresponding to a SYCL accessor.
acc | a SYCL buffer or image accessor. |
Definition at line 62 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::SLM_BTI.
Referenced by sycl::_V1::ext::intel::esimd::gather_rgba(), sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update(), sycl::_V1::ext::intel::experimental::esimd::lsc_gather(), sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch(), sycl::_V1::ext::intel::experimental::esimd::lsc_scatter(), sycl::_V1::ext::intel::esimd::media_block_load(), sycl::_V1::ext::intel::esimd::media_block_store(), sycl::_V1::ext::intel::esimd::scatter_rgba(), sycl::_V1::ext::intel::esimd::slm_atomic_update(), sycl::_V1::ext::intel::esimd::slm_block_load(), sycl::_V1::ext::intel::esimd::slm_block_store(), sycl::_V1::ext::intel::esimd::slm_gather_rgba(), and sycl::_V1::ext::intel::esimd::slm_scatter_rgba().
__ESIMD_API simd<T, m * N> sycl::_V1::ext::intel::esimd::media_block_load | ( | AccessorTy | acc, |
unsigned | x, | ||
unsigned | y | ||
) |
Media block load.
T | is the element data type. |
m | is the height of the 2D block. |
N | is the width of the 2D block. |
AccessorTy | is type of the SYCL accessor. |
plane | is planar surface index. |
acc | is the SYCL accessor. |
x | is X-coordinate of the left upper rectangle corner in BYTES. |
y | is Y-coordinate of the left upper rectangle corner in ROWS. |
Definition at line 1474 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API void sycl::_V1::ext::intel::esimd::media_block_store | ( | AccessorTy | acc, |
unsigned | x, | ||
unsigned | y, | ||
simd< T, m *N > | vals | ||
) |
Media block store.
T | is the element data type. |
m | is the height of the 2D block. |
N | is the width of the 2D block. |
is | AccessorTy type of the SYCL accessor. |
plane | is planar surface index. |
acc | is the SYCL accessor. |
x | is X-coordinate of the left upper rectangle corner in BYTES. |
y | is Y-coordinate of the left upper rectangle corner in ROWS. |
vals | is the linearized 2D block data to be written to surface. |
Definition at line 1515 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API T sycl::_V1::ext::intel::esimd::scalar_load | ( | AccessorTy | acc, |
uint32_t | offset | ||
) |
Load a scalar value from an accessor.
T | Type of the value. |
AccessorTy | Type of the accessor. |
acc | Accessor to load from. |
offset | Offset in bytes. |
Definition at line 583 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::scalar_store | ( | AccessorTy | acc, |
uint32_t | offset, | ||
T | val | ||
) |
Store a scalar value into an accessor.
T | Type of the value. |
AccessorTy | Type of the accessor. |
acc | Accessor to store to. |
offset | Offset in bytes. |
val | The stored value. |
Definition at line 597 of file memory.hpp.
__ESIMD_API std::enable_if_t<(sizeof(T) <= 4) && (N == 1 || N == 8 || N == 16 || N == 32) && !std::is_pointer<AccessorTy>::value> sycl::_V1::ext::intel::esimd::scatter | ( | AccessorTy | acc, |
simd< uint32_t, N > | offsets, | ||
simd< T, N > | vals, | ||
uint32_t | glob_offset = 0 , |
||
simd_mask< N > | mask = 1 |
||
) |
Writes elements of a simd object into an accessor at given offsets. An element can be 1, 2 or 4-byte value.
T | Element type; can only be a 1,2,4-byte integer, sycl::half or float . |
N | The number of vector elements. Can be 1 , 8 , 16 or 32 . |
AccessorTy | The accessor type. |
acc | The accessor to scatter to. |
offsets | Per-element offsets in bytes. |
vals | Values to write. |
glob_offset | Offset in bytes added to each individual element's offset to compute actual memory access offset for that element. |
mask | Memory access mask. Elements with zero corresponding mask's predicate are not accessed. |
Definition at line 565 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::scatter | ( | Tx * | p, |
simd< Toffset, N > | offsets, | ||
simd< Tx, N > | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
Writes ("scatters") elements of the input vector to different memory locations.
Each memory location is base address plus an offset - a value of the corresponding element in the input offset vector. Access to any element's memory location can be disabled via the input mask.
Tx | Element type, must be of size 4 or less. |
N | Number of elements to write; can be 1 , 2 , 4 , 8 , 16 or 32 . |
p | The base address. |
offsets | A vector of 32-bit or 64-bit offsets in bytes. For each lane i , ((byte*)p + offsets[i]) must be element size aligned. |
vals | The vector to scatter. |
mask | The access mask, defaults to all 1s. |
Definition at line 210 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::isPowerOf2().
__ESIMD_API void sycl::_V1::ext::intel::esimd::scatter | ( | Tx * | p, |
simd_view< Toffset, RegionTy > | offsets, | ||
simd< Tx, N > | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of scatter
API with offsets
represented as simd_view
object.
Tx | Element type, must be of size 4 or less. |
N | Number of elements to write; can be 1 , 2 , 4 , 8 , 16 or 32 . |
p | The base address. |
offsets | A simd_view of 32-bit or 64-bit offsets in bytes. For each lane i , ((byte*)p + offsets[i]) must be element size aligned. |
vals | The vector to scatter. |
mask | The access mask, defaults to all 1s. |
Definition at line 250 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1> sycl::_V1::ext::intel::esimd::scatter | ( | Tx * | p, |
Toffset | offset, | ||
simd< Tx, N > | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of scatter
API with offsets
represented as scalar.
Tx | Element type, must be of size 4 or less. |
N | Number of elements to write; can be 1 , 2 , 4 , 8 , 16 or 32 . |
p | The base address. |
offset | the scalar 32-bit or 64-bit offset in bytes. ((byte*)p + offset) must be element size aligned. |
vals | The vector to scatter. |
mask | The access mask, defaults to all 1s. |
Definition at line 270 of file memory.hpp.
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && sizeof(T) == 4 && !std::is_pointer_v<AccessorT> > sycl::_V1::ext::intel::esimd::scatter_rgba | ( | AccessorT | acc, |
simd< uint32_t, N > | offsets, | ||
simd< T, N *get_num_channels_enabled(RGBAMask)> | vals, | ||
uint32_t | global_offset = 0 , |
||
simd_mask< N > | mask = 1 |
||
) |
Gather data from the memory addressed by accessor acc
, offset common for all loaded elements global_offset
and per-element offsets offsets
, and return it as simd vector.
See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.
RGBAMask | Pixel's channel mask. |
AccessorT | The accessor type for the memory to be stored/scattered. The returned vector elements mast match the accessor data type. The loaded elements must be 4 bytes in size. |
N | The number of elements to access. |
offsets | Byte offsets of each element. |
vals | values to be written. |
global_offset | Byte offset of the pixels relative to the base pointer. |
mask | Operation mask. All-1 by default. |
Definition at line 876 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API void sycl::_V1::ext::intel::esimd::scatter_rgba | ( | T * | p, |
simd< Toffset, N > | offsets, | ||
simd< T, N *get_num_channels_enabled(RGBAMask)> | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
Transpose and scatter pixels to given memory locations defined by the base pointer p
and offsets
. Up to 4 32-bit data elements may be accessed at each address depending on the channel mask RGBAMask
. Each pixel's address must be 4 byte aligned. This is basically an inverse operation for gather_rgba. Unlike gather_rgba
, this function imposes restrictions on possible Mask
template argument values. It can only be one of the following: ABGR
, BGR
, GR
, R
.
T | Element type of the returned vector. Must be 4 bytes in size. |
N | Number of pixels to access (matches the size of the offsets vector). Must be 8, 16 or 32. |
RGBAMask | A pixel's channel mask. |
p | The USM base pointer representing memory address of the access. |
vals | values to be written. |
offsets | vector of byte offsets of the pixels relative to the base pointer. |
mask | Memory access mask. Pixels with zero corresponding mask's predicate are not accessed. Their values in the resulting vector are undefined. |
Definition at line 740 of file memory.hpp.
__ESIMD_API std:: enable_if_t<(N == 8 || N == 16 || N == 32) && sizeof(T) == 4> sycl::_V1::ext::intel::esimd::scatter_rgba | ( | T * | p, |
simd< uint32_t, N > | offsets, | ||
simd< T, N *get_num_channels_enabled(RGBAMask)> | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
Definition at line 807 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::scatter_rgba | ( | T * | p, |
simd_view< Toffset, RegionTy > | offsets, | ||
simd< T, N *get_num_channels_enabled(RGBAMask)> | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of scatter_rgba
API with offsets
represented as simd_view
object.
T | Element type of the returned vector. Must be 4 bytes in size. |
N | Number of pixels to access (matches the size of the offsets vector). Must be 8, 16 or 32. |
RGBAMask | A pixel's channel mask. |
p | The USM base pointer representing memory address of the access. |
vals | values to be written. |
offsets | simd_view of byte offsets of the pixels relative to the base pointer. |
mask | Memory access mask. Pixels with zero corresponding mask's predicate are not accessed. Their values in the resulting vector are undefined. |
Definition at line 773 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1> sycl::_V1::ext::intel::esimd::scatter_rgba | ( | T * | p, |
Toffset | offset, | ||
simd< T, N *get_num_channels_enabled(RGBAMask)> | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
A variation of scatter_rgba
API with offsets
represented as scalar.
T | Element type of the returned vector. Must be 4 bytes in size. |
N | Number of pixels to access (matches the size of the offsets vector). Must be 8, 16 or 32. |
RGBAMask | A pixel's channel mask. |
p | The USM base pointer representing memory address of the access. |
vals | values to be written. |
offset | scalar byte offset of the pixels relative to the base pointer. |
mask | Memory access mask. Pixels with zero corresponding mask's predicate are not accessed. Their values in the resulting vector are undefined. |
Definition at line 798 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::split_barrier | ( | ) |
Generic work-group split barrier.
flag | - split barrier action. |
Definition at line 28 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::split_barrier | ( | split_barrier_action | flag | ) |
Definition at line 33 of file memory.hpp.