This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM. More...
Namespaces | |
sycl::_V1::ext::intel::experimental::esimd::detail | |
Classes | |
class | sycl::_V1::ext::intel::experimental::esimd::config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > |
Container class to hold parameters for load2d/store2d functions More... | |
Functions | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
SLM gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values) |
SLM gather. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1) |
Transposed SLM gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > old_values) |
Transposed SLM gather with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
USM pointer gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values) |
USM pointer gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< !std::is_pointer_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
Accessor-based gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< !std::is_pointer_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values) |
Accessor-based gather. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{}) |
USM pointer transposed gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, FlagsT flags) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > old_values, FlagsT flags=FlagsT{}) |
USM pointer transposed gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{}) |
Accessor-based transposed gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t<!std::is_pointer< AccessorTy >::value &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, FlagsT flags) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > old_values, FlagsT flags=FlagsT{}) |
Accessor-based transposed gather with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
USM pointer prefetch gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > > | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p) |
USM pointer prefetch transposed gather with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > > | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
Accessor-based prefetch gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > > | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, uint32_t offset) |
Accessor-based transposed prefetch gather with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
SLM scatter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store (uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals) |
Transposed SLM scatter with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
USM pointer scatter. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&N==1 > | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > > | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
Accessor-based scatter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{}) |
USM pointer transposed scatter with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags) |
A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{}) |
Accessor-based transposed scatter with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{}) |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y) |
2D USM pointer block load. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y) |
2D USM pointer block prefetch. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, 1u, BlockHeight, BlockWidth, false, false>()> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals) |
2D USM pointer block store. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()> | |
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload) |
A variation of 2D stateless block load with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()> | |
ESIMD_INLINE SYCL_ESIMD_FUNCTION void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload) |
A variation of 2D stateless block prefetch with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()> | |
ESIMD_INLINE SYCL_ESIMD_FUNCTION void | sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload, sycl::ext::intel::esimd::simd< T, N > Data) |
A variation of 2D stateless block store with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
SLM atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
SLM atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
SLM atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==0, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
USM pointer atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==0, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==0, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==1, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
USM pointer atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==1, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==1 &&((Op !=sycl::ext::intel::esimd::atomic_op::store &&Op !=sycl::ext::intel::esimd::atomic_op::xchg)||N==1), sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==2, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
USM pointer atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==2, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==2, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset > | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
Accessor-based atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
Variant of lsc_atomic_update that uses local_accessor as a parameter. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset > | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
Accessor-based atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
Variant of lsc_atomic_update that uses local_accessor as a parameter. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset > | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
Accessor-based atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
Variant of lsc_atomic_update that uses local_accessor as a parameter. More... | |
template<lsc_memory_kind Kind = lsc_memory_kind::untyped_global, lsc_fence_op FenceOp = lsc_fence_op::none, lsc_scope Scope = lsc_scope::group, int N = 16> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_fence (sycl::ext::intel::esimd::simd_mask< N > pred=1) |
Memory fence. More... | |
This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM.
|
strong |
LSC atomic operation codes.
atomic_update<native::lsc::atomic_op::inc>(...);
is a short-cut to lsc_atomic_update<atomic_op::inc>(...);
with default cache and data size controls.
Enumerator | |
---|---|
inc | |
dec | |
load | |
store | |
add | |
sub | |
smin | |
smax | |
umin | |
umax | |
cmpxchg | |
fadd | |
fsub | |
fmin | |
fmax | |
fcmpxchg | |
bit_and | |
bit_or | |
bit_xor |
Definition at line 39 of file common.hpp.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Accessor-based atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | is predicates. |
Definition at line 3575 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Accessor-based atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 3486 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::src0.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Accessor-based atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 3400 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Variant of lsc_atomic_update
that uses local_accessor
as a parameter.
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | is predicates. |
Definition at line 3633 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Variant of lsc_atomic_update
that uses local_accessor
as a parameter.
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 3542 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::src0.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Variant of lsc_atomic_update
that uses local_accessor
as a parameter.
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 3454 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
USM pointer atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | predicates. |
Definition at line 3315 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
USM pointer atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 3233 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::src0.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
USM pointer atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 3158 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 3353 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 3269 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::src0.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 3193 of file memory.hpp.
__ESIMD_API std::enable_if_t< std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 3368 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.
__ESIMD_API std::enable_if_t< std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 1 && ((Op != sycl::ext::intel::esimd::atomic_op::store && Op != sycl::ext::intel::esimd::atomic_op::xchg) || N == 1), sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 3287 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::src0.
__ESIMD_API std::enable_if_t< std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 3206 of file memory.hpp.
__ESIMD_API std::enable_if_t<!std::is_pointer<AccessorTy>::value && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
uint32_t | offset, | ||
FlagsT | flags | ||
) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.
Collects elements located at surface and returns them as a single simd object. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
flags | is the alignment specifier type tag. |
pred
is 0 are undefined. Definition at line 1448 of file memory.hpp.
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
uint32_t | offset, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred, | ||
sycl::ext::intel::esimd::simd< T, NElts > | old_values, | ||
FlagsT | flags = FlagsT{} |
||
) |
Accessor-based transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
pred | is operation predicate. Operation is skipped for index 'i' if pred[i] == 0 and the result element is taken from old_values [i]. Otherwise, the operation is performed. |
old_values | contains the values copied to the result when the corresponding element from pred is zero. |
flags | is the alignment specifier type tag. |
Definition at line 1505 of file memory.hpp.
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
uint32_t | offset, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | flags = FlagsT{} |
||
) |
Accessor-based transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
flags | is the alignment specifier type tag. |
pred
is 0 are undefined. Definition at line 1325 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | const T * | p, |
FlagsT | flags | ||
) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.
Accesses contiguous block of memory of NElts * S
bytes starting from given address, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
flags | is the alignment specifier type tag. |
pred
is 0 are undefined. Definition at line 1171 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | const T * | p, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred, | ||
sycl::ext::intel::esimd::simd< T, NElts > | old_values, | ||
FlagsT | flags = FlagsT{} |
||
) |
USM pointer transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Accesses contiguous block of memory of NElts * S
bytes starting from given address, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. |
old_values | contains the vector which elements are copied to the returned result when the corresponding element of pred is 0. |
flags | is the alignment specifier type tag. |
Definition at line 1217 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | const T * | p, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | flags = FlagsT{} |
||
) |
USM pointer transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Accesses contiguous block of memory of NElts * S
bytes starting from given address, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
flags | is the alignment specifier type tag. |
pred
is 0 are undefined. Definition at line 1071 of file memory.hpp.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | AccessorTy | acc, |
uint32_t | offset, | ||
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
FlagsT | flags = FlagsT{} |
||
) |
Definition at line 2288 of file memory.hpp.
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | AccessorTy | acc, |
uint32_t | offset, | ||
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | flags = FlagsT{} |
||
) |
Accessor-based transposed scatter with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to surface. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
vals | is values to store. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
flags | is the alignment specifier type tag. |
Definition at line 2208 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | T * | p, |
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
FlagsT | flags | ||
) |
A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter.
Scatters elements to specific address. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
vals | is values to store. |
flags | is the alignment specifier type tag. |
Definition at line 2161 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | T * | p, |
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | flags = FlagsT{} |
||
) |
USM pointer transposed scatter with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to specific address. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
vals | is values to store. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
flags | is the alignment specifier type tag. |
Definition at line 2069 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_fence | ( | sycl::ext::intel::esimd::simd_mask< N > | pred = 1 | ) |
Memory fence.
Supported platforms: DG2, PVC
Kind | is the Sfid shaded function. |
FenceOp | is the fence operation. |
Scope | is the operation scope. |
N | is the number of channels (platform dependent). |
pred | is predicates. |
Definition at line 3651 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::group, sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::shared_local.
__ESIMD_API std::enable_if_t< !std::is_pointer_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | old_values | ||
) |
Accessor-based gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
old_values | contains the vector which elements are copied to the returned result when the corresponding element of pred is 0. |
Definition at line 962 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API std::enable_if_t< !std::is_pointer_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Accessor-based gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 873 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | old_values | ||
) |
USM pointer gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at specified address and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
old_values | contains the vector which elements are copied to the returned result when the corresponding element of pred is 0. |
Definition at line 776 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
USM pointer gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at specified address and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 729 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | old_values | ||
) |
Definition at line 816 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 806 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | old_values | ||
) |
Definition at line 840 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 829 of file memory.hpp.
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d | ( | config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > & | payload | ) |
A variation of 2D
stateless block load with
parameters passed as config_2d_mem_access
object
Note: Compatibility with future hardware versions is not guaranteed.
Note: No software mitigation for hardware bugs is possible for this function.
T | is the element data type |
BlockWidth | the block width in number of elements |
BlockHeight | block height in number of elements |
NBlocks | Number of blocks |
Transposed | is the transposed version or not. |
Transformed | is apply VNNI transform or not. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the data size |
payload | is config_2d_mem_access object holding all the data |
Definition at line 2856 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), and sycl::_V1::ext::intel::experimental::esimd::detail::load.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d | ( | const T * | Ptr, |
unsigned | SurfaceWidth, | ||
unsigned | SurfaceHeight, | ||
unsigned | SurfacePitch, | ||
int | X, | ||
int | Y | ||
) |
2D USM pointer block load.
Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm
Collects elements located at specified address and returns them as a single simd object.
T | is element type. |
BlockWidth | is the block width in number of elements. |
BlockHeight | is the block height in number of elements. |
NBlocks | is the number of blocks. |
Transposed | is the transposed version or not. |
Transformed | is apply VNNI transform or not. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the data size |
Ptr | is the surface base address for this operation. |
SurfaceWidth | is the surface width minus 1 in bytes |
SurfaceHeight | is the surface height minus 1 in rows |
SurfacePitch | is the surface pitch minus 1 in bytes |
X | is zero based X-coordinate of the left upper rectangle corner in number of elements. |
Y | is zero based Y-coordinate of the left upper rectangle corner in rows. |
Definition at line 2442 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), sycl::_V1::ext::intel::experimental::esimd::detail::load, sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::detail::transpose.
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Accessor-based prefetch gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at surface.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 1712 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | AccessorTy | acc, |
uint32_t | offset | ||
) |
Accessor-based transposed prefetch gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at surface.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
Definition at line 1778 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p | ) |
USM pointer prefetch transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at specified address.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
p | is the base pointer. |
Definition at line 1665 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
USM pointer prefetch gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at specified address.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 1608 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p, |
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1633 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1644 of file memory.hpp.
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d | ( | config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > & | payload | ) |
A variation of 2D
stateless block prefetch with
parameters passed as config_2d_mem_access
object
Note: Compatibility with future hardware versions is not guaranteed.
Note: No software mitigation for hardware bugs is possible for this function.
T | is the element data type |
BlockWidth | the block width in number of elements |
BlockHeight | block height in number of elements |
NBlocks | Number of blocks |
Transposed | is the transposed version or not. |
Transformed | is apply VNNI transform or not. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the data size |
payload | is config_2d_mem_access object holding all the data |
Definition at line 2941 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), and sycl::_V1::ext::intel::experimental::esimd::detail::prefetch.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d | ( | const T * | Ptr, |
unsigned | SurfaceWidth, | ||
unsigned | SurfaceHeight, | ||
unsigned | SurfacePitch, | ||
int | X, | ||
int | Y | ||
) |
2D USM pointer block prefetch.
Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm
Prefetches elements located at specified address.
T | is element type. |
BlockWidth | is the block width in number of elements. |
BlockHeight | is the block height in number of elements. |
NBlocks | is the number of blocks. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the data size |
Ptr | is the surface base address for this operation. |
SurfaceWidth | is the surface width minus 1 in bytes |
SurfaceHeight | is the surface height minus 1 in rows |
SurfacePitch | is the surface pitch minus 1 in bytes |
X | is zero based X-coordinate of the left upper rectangle corner in number of elements. |
Y | is zero based Y-coordinate of the left upper rectangle corner in rows. |
Definition at line 2549 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::detail::prefetch.
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Accessor-based scatter.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to surface.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
vals | is values to store. |
pred | is predicates. |
Definition at line 1971 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
USM pointer scatter.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to specific address.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
vals | is values to store. |
pred | is predicates. |
Definition at line 1900 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | T * | p, |
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1928 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1> sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1940 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
SLM atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | is predicates. |
Definition at line 3108 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
SLM atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 3061 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::src0.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
SLM atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 3021 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load | ( | uint32_t | offset, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred, | ||
sycl::ext::intel::esimd::simd< T, NElts > | old_values | ||
) |
Transposed SLM gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
offset | is the zero-based offset for SLM buffer in bytes. |
pred | is the predicate; if it contains 0, then the actual load is not performed and old_values is returned. |
old_values | contains the vector that is returned if the parameter pred contains 0. |
Definition at line 687 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load | ( | uint32_t | offset, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 |
||
) |
Transposed SLM gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
offset | is the zero-based offset for SLM buffer in bytes. |
pred | is the predicate; if it contains 0, then the actual load is not performed and the returned value is undefined. |
Definition at line 650 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store | ( | uint32_t | offset, |
sycl::ext::intel::esimd::simd< T, NElts > | vals | ||
) |
Transposed SLM scatter with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_store.slm
Scatters elements located to slm.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
offset | is the zero-based offset for SLM buffer in bytes. |
vals | is values to store. |
Definition at line 1859 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | old_values | ||
) |
SLM gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
N | is the number of channels (platform dependent). |
offsets | is the zero-based offsets for SLM buffer in bytes. |
pred | is predicates. |
old_values | values copied to the result when the corresponding element of pred is zero.. |
Definition at line 610 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
SLM gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
N | is the number of channels (platform dependent). |
offsets | is the zero-based offsets for SLM buffer in bytes. |
pred | is predicates. |
Definition at line 572 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
SLM scatter.
Supported platforms: DG2, PVC VISA instruction: lsc_store.slm
Scatters elements located to slm.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
N | is the number of channels (platform dependent). |
offsets | is the zero-based offsets for SLM buffer in bytes. |
vals | is values to store. |
pred | is predicates. |
Definition at line 1826 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d | ( | config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > & | payload, |
sycl::ext::intel::esimd::simd< T, N > | Data | ||
) |
A variation of 2D
stateless block store with
parameters passed as config_2d_mem_access
object
Note: Compatibility with future hardware versions is not guaranteed.
Note: No software mitigation for hardware bugs is possible for this function.
T | is the element data type |
BlockWidth | the block width in number of elements |
BlockHeight | block height in number of elements |
NBlocks | Number of blocks |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the data size |
payload | is config_2d_mem_access object holding all the data |
Data | is the data to be stored. |
Definition at line 2983 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), and sycl::_V1::ext::intel::experimental::esimd::detail::store.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d | ( | T * | Ptr, |
unsigned | SurfaceWidth, | ||
unsigned | SurfaceHeight, | ||
unsigned | SurfacePitch, | ||
int | X, | ||
int | Y, | ||
sycl::ext::intel::esimd::simd< T, N > | Vals | ||
) |
2D USM pointer block store.
Supported platforms: PVC VISA instruction: lsc_store_block2d.ugm
Stores elements at specified address.
T | is element type. |
BlockWidth | is the block width in number of elements. |
BlockHeight | is the block height in number of elements. |
L1H | is L1 cache hint. |
L3H | is L3 cache hint. |
N | is the data size |
Ptr | is the surface base address for this operation. |
SurfaceWidth | is the surface width minus 1 in bytes |
SurfaceHeight | is the surface height minus 1 in rows |
SurfacePitch | is the surface pitch minus 1 in bytes |
X | is zero based X-coordinate of the left upper rectangle corner in number of elements. |
Y | is zero based Y-coordinate of the left upper rectangle corner in rows. |
Vals | is a vector to store of type T and size N, where N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) * getNextPowerOf2(BlockWidth) * NBlocks |
Definition at line 2595 of file memory.hpp.
References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::detail::store.