DPC++ Runtime
Runtime libraries for oneAPI DPC++
Shared local memory access functions.
Collaboration diagram for Shared local memory access functions.:

Functions

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ()
 Declare per-work-group slm size. More...
 
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init (uint32_t size)
 Declare per-work-group slm size. More...
 
template<typename T , int N>
__ESIMD_API std::enable_if_t<(N==1||N==8||N==16||N==32), simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > offsets, simd_mask< N > mask=1)
 Gather operation over the Shared Local Memory. More...
 
template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load (uint32_t offset)
 Load a scalar value from the Shared Local Memory. More...
 
template<typename T , int N>
__ESIMD_API std::enable_if_t<(N==1||N==8||N==16||N==32) &&(sizeof(T)<=4)> sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N > offsets, simd< T, N > vals, simd_mask< N > mask=1)
 Scatter operation over the Shared Local Memory. More...
 
template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store (uint32_t offset, T val)
 Store a scalar value into the Shared Local Memory. More...
 
template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba (simd< uint32_t, N > offsets, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba (simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N>
__ESIMD_API simd< T, N > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset)
 Loads a contiguous block of memory from the SLM at given offset and returns the loaded data as a vector. More...
 
template<typename T , int N>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t offset, simd< T, N > vals)
 Stores elements of a vector to a contiguous block of SLM at given offset. More...
 
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd< Tx, N > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd_mask< N > mask)
 Atomic update operation performed on SLM. More...
 
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd< Tx, N > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd< Tx, N > src0, simd_mask< N > mask)
 Atomic update operation performed on SLM. More...
 
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd< Tx, N > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd< Tx, N > src0, simd< Tx, N > src1, simd_mask< N > mask)
 Atomic update operation performed on SLM. More...
 

Detailed Description

Function Documentation

◆ slm_atomic_update() [1/3]

template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  offsets,
simd< Tx, N >  src0,
simd< Tx, N >  src1,
simd_mask< N >  mask 
)

Atomic update operation performed on SLM.

Two source operands version. See description of template and function parameters in atomic update operation docs.

Definition at line 1915 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_atomic_update() [2/3]

template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  offsets,
simd< Tx, N >  src0,
simd_mask< N >  mask 
)

Atomic update operation performed on SLM.

One source operands version. See description of template and function parameters in atomic update operation docs.

Definition at line 1903 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_atomic_update() [3/3]

template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask 
)

Atomic update operation performed on SLM.

No source operands version. See description of template and function parameters in atomic update operation docs.

Definition at line 1892 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_block_load()

template<typename T , int N>
__ESIMD_API simd<T, N> sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  offset)

Loads a contiguous block of memory from the SLM at given offset and returns the loaded data as a vector.

Template Parameters
TElement type.
NNumber of elements to load, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long.
Parameters
offsetThe offset to load from in bytes. Must be oword-aligned.
Returns
A vector of loaded elements.

Definition at line 1849 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::esimd::detail::isPowerOf2().

◆ slm_block_store()

template<typename T , int N>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  offset,
simd< T, N >  vals 
)

Stores elements of a vector to a contiguous block of SLM at given offset.

Template Parameters
TElement type.
NNumber of elements to store, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long.
Parameters
offsetThe offset in bytes to store at. Must be oword-aligned.
valsThe vector to store.

Definition at line 1873 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::esimd::detail::isPowerOf2().

◆ slm_gather()

template<typename T , int N>
__ESIMD_API std::enable_if_t<(N == 1 || N == 8 || N == 16 || N == 32), simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask = 1 
)

Gather operation over the Shared Local Memory.

This API has almost the same interface as the accessor-based gather, except that it does not have the accessor and the global offset parameters.

Definition at line 1760 of file memory.hpp.

◆ slm_gather_rgba()

template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
RGBAMaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
maskOperation mask. All-1 by default.
Returns
Gathered data as an N - element vector.

Definition at line 1811 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_init() [1/2]

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( )

Declare per-work-group slm size.

GPU RT/driver requires this function to be called in the beginning of the kernel using SLM. There must be only 1 call site of slm_init() per kernel. If slm_init is called from some function F called from the kernel, then inlining of F into the kernel must be managed/guaranteed. slm_init<SLMSize> can also be used together with slm_allocator() class. In such cases slm_allocator<AdditionalMem> allocates extra chunk of SLM memory and the final amount of allocated SLM may be bigger than what is requested by slm_init. See more details on slm_allocator class usage at it's declaration and ESIMD extension SPEC.

Template Parameters
SLMSizeShared Local Memory (SLM) size

Definition at line 1738 of file memory.hpp.

◆ slm_init() [2/2]

__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( uint32_t  size)

Declare per-work-group slm size.

Non-constant argument version to be used with specialization constants only. Same restrictions are applied to this function as to it's template variant slm_init<SLMSize>(). This version has an additional restriction - it cannot be used together

Parameters
sizeShared Local Memory (SLM) size to be allocated for each work-group of ESIMD kernel.

Definition at line 1750 of file memory.hpp.

◆ slm_scalar_load()

template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load ( uint32_t  offset)

Load a scalar value from the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
Returns
the loaded value

Definition at line 1770 of file memory.hpp.

◆ slm_scalar_store()

template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store ( uint32_t  offset,
T  val 
)

Store a scalar value into the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
valvalue to store

Definition at line 1794 of file memory.hpp.

◆ slm_scatter()

template<typename T , int N>
__ESIMD_API std::enable_if_t<(N == 1 || N == 8 || N == 16 || N == 32) && (sizeof(T) <= 4)> sycl::_V1::ext::intel::esimd::slm_scatter ( simd< uint32_t, N >  offsets,
simd< T, N >  vals,
simd_mask< N >  mask = 1 
)

Scatter operation over the Shared Local Memory.

This API has almost the same interface as the accessor-based scatter, except that it does not have the accessor and the global offset parameters.

Definition at line 1783 of file memory.hpp.

◆ slm_scatter_rgba()

template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba ( simd< uint32_t, N >  offsets,
simd< T, N *get_num_channels_enabled(Mask)>  vals,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_scatter_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
MaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
valsvalues to be written.
maskOperation mask. All-1 by default.

Definition at line 1829 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index().