DPC++ Runtime
Runtime libraries for oneAPI DPC++
Shared local memory access functions.
Collaboration diagram for Shared local memory access functions.:

Functions

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ()
 Declare per-work-group slm size. More...
 
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init (uint32_t size)
 Declare per-work-group slm size. More...
 
template<typename T , int N>
__ESIMD_API std::enable_if_t<(N==1||N==8||N==16||N==32), simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > offsets, simd_mask< N > mask=1)
 Gather operation over the Shared Local Memory. More...
 
template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load (uint32_t offset)
 Load a scalar value from the Shared Local Memory. More...
 
template<typename T , int N>
__ESIMD_API std::enable_if_t<(N==1||N==8||N==16||N==32) &&(sizeof(T)<=4)> sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N > offsets, simd< T, N > vals, simd_mask< N > mask=1)
 Scatter operation over the Shared Local Memory. More...
 
template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store (uint32_t offset, T val)
 Store a scalar value into the Shared Local Memory. More...
 
template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba (simd< uint32_t, N > offsets, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba (simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset, Flags={})
 Loads a contiguous block of SLM memory referenced by the given byte-offset offset, then returns the loaded data as a simd object. More...
 
template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t offset, simd< T, N > vals, Flags={})
 Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset. More...
 
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd< Tx, N > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd_mask< N > mask)
 Atomic update operation performed on SLM. More...
 
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd< Tx, N > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd< Tx, N > src0, simd_mask< N > mask)
 Atomic update operation performed on SLM. More...
 
template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd< Tx, N > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > offsets, simd< Tx, N > src0, simd< Tx, N > src1, simd_mask< N > mask)
 Atomic update operation performed on SLM. More...
 

Detailed Description

Function Documentation

◆ slm_atomic_update() [1/3]

template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  offsets,
simd< Tx, N >  src0,
simd< Tx, N >  src1,
simd_mask< N >  mask 
)

Atomic update operation performed on SLM.

Two source operands version. See description of template and function parameters in atomic update operation docs.

Definition at line 1228 of file memory.hpp.

References simd_mask< _Tp, _Abi >::data(), sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.

◆ slm_atomic_update() [2/3]

template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  offsets,
simd< Tx, N >  src0,
simd_mask< N >  mask 
)

Atomic update operation performed on SLM.

One source operands version. See description of template and function parameters in atomic update operation docs.

Definition at line 1216 of file memory.hpp.

References simd_mask< _Tp, _Abi >::data(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::src0.

◆ slm_atomic_update() [3/3]

template<atomic_op Op, typename Tx , int N, class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask 
)

Atomic update operation performed on SLM.

No source operands version. See description of template and function parameters in atomic update operation docs.

Definition at line 1205 of file memory.hpp.

References simd_mask< _Tp, _Abi >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_block_load()

template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  offset,
Flags  = {} 
)

Loads a contiguous block of SLM memory referenced by the given byte-offset offset, then returns the loaded data as a simd object.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient gather is generated. If the loaded vector is too long for 1 flat-load GPU instruction, then a series of flat-loads and/or gathers may be generated.

Template Parameters
TElement type.
NNumber of elements to load.
FlagsThe alignment specifier type tag.
Parameters
offsetThe byte-offset to load from.
FlagsSpecifies the alignment.
Returns
A vector of loaded elements.

Definition at line 1173 of file memory.hpp.

◆ slm_block_store()

template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  offset,
simd< T, N >  vals,
Flags  = {} 
)

Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient scatter is generated. If the stored vector is too long for 1 flat-store GPU instruction, then a series of flat-store and/or scatters may be generated.

Template Parameters
TElement type.
NNumber of elements to store.
FlagsThe alignment specifier type tag.
Parameters
offsetThe byte-offset to store at.
valsThe vector to store.
FlagsSpecifies the alignment.

Definition at line 1196 of file memory.hpp.

◆ slm_gather()

template<typename T , int N>
__ESIMD_API std::enable_if_t<(N == 1 || N == 8 || N == 16 || N == 32), simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask = 1 
)

Gather operation over the Shared Local Memory.

This API has almost the same interface as the accessor-based gather, except that it does not have the accessor and the global offset parameters.

Definition at line 1075 of file memory.hpp.

◆ slm_gather_rgba()

template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
RGBAMaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
maskOperation mask. All-1 by default.
Returns
Gathered data as an N - element vector.

Definition at line 1126 of file memory.hpp.

References simd_mask< _Tp, _Abi >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_init() [1/2]

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( )

Declare per-work-group slm size.

GPU RT/driver requires this function to be called in the beginning of the kernel using SLM. There must be only 1 call site of slm_init() per kernel. If slm_init is called from some function F called from the kernel, then inlining of F into the kernel must be managed/guaranteed. slm_init<SLMSize> can also be used together with slm_allocator() class. In such cases slm_allocator<AdditionalMem> allocates extra chunk of SLM memory and the final amount of allocated SLM may be bigger than what is requested by slm_init. See more details on slm_allocator class usage at it's declaration and ESIMD extension SPEC.

Template Parameters
SLMSizeShared Local Memory (SLM) size

Definition at line 1053 of file memory.hpp.

◆ slm_init() [2/2]

__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( uint32_t  size)

Declare per-work-group slm size.

Non-constant argument version to be used with specialization constants only. Same restrictions are applied to this function as to it's template variant slm_init<SLMSize>(). This version has an additional restriction - it cannot be used together

Parameters
sizeShared Local Memory (SLM) size to be allocated for each work-group of ESIMD kernel.

Definition at line 1065 of file memory.hpp.

◆ slm_scalar_load()

template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load ( uint32_t  offset)

Load a scalar value from the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
Returns
the loaded value

Definition at line 1085 of file memory.hpp.

◆ slm_scalar_store()

template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store ( uint32_t  offset,
val 
)

Store a scalar value into the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
valvalue to store

Definition at line 1109 of file memory.hpp.

◆ slm_scatter()

template<typename T , int N>
__ESIMD_API std::enable_if_t<(N == 1 || N == 8 || N == 16 || N == 32) && (sizeof(T) <= 4)> sycl::_V1::ext::intel::esimd::slm_scatter ( simd< uint32_t, N >  offsets,
simd< T, N >  vals,
simd_mask< N >  mask = 1 
)

Scatter operation over the Shared Local Memory.

This API has almost the same interface as the accessor-based scatter, except that it does not have the accessor and the global offset parameters.

Definition at line 1098 of file memory.hpp.

◆ slm_scatter_rgba()

template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba ( simd< uint32_t, N >  offsets,
simd< T, N *get_num_channels_enabled(Mask)>  vals,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_scatter_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
MaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
valsvalues to be written.
maskOperation mask. All-1 by default.

Definition at line 1144 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index().