This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM. More...
Classes | |
class | sycl::_V1::ext::intel::experimental::esimd::config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > |
Container class to hold parameters for load2d/store2d functions More... | |
Functions | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
SLM gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru) |
SLM gather. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{}) |
Transposed SLM gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru) |
Transposed SLM gather with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
USM pointer gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru) |
USM pointer gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy > | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy > | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
Accessor-based gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru) |
Accessor-based gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N *NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru) |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={}) |
USM pointer transposed gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, FlagsT) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru, FlagsT={}) |
USM pointer transposed gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{}) |
Accessor-based transposed gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{}) |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, FlagsT flags) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, FlagsT flags) |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru, FlagsT={}) |
Accessor-based transposed gather with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru, FlagsT flags=FlagsT{}) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
USM pointer prefetch gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy > | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > > | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, FlagsT={}) |
USM pointer prefetch transposed gather with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > > | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
Accessor-based prefetch gather. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, FlagsT flags=FlagsT{}) |
Accessor-based transposed prefetch gather with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
SLM scatter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store (uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{}) |
Transposed SLM scatter with 1 channel. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
USM pointer scatter. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy > | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&N==1 > | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > > | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
Accessor-based scatter. More... | |
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > > | sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={}) |
USM pointer transposed scatter with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags) |
A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={}) |
Accessor-based transposed scatter with 1 channel. More... | |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{}) |
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > | sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags) |
A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y) |
2D USM pointer block load. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y) |
2D USM pointer block prefetch. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, 1u, BlockHeight, BlockWidth, false, false>()> | |
__ESIMD_API void | sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals) |
2D USM pointer block store. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()> | |
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload) |
A variation of 2D stateless block load with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()> | |
ESIMD_INLINE SYCL_ESIMD_FUNCTION void | sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload) |
A variation of 2D stateless block prefetch with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More... | |
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()> | |
ESIMD_INLINE SYCL_ESIMD_FUNCTION void | sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload, sycl::ext::intel::esimd::simd< T, N > Data) |
A variation of 2D stateless block store with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
SLM atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
SLM atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size> | |
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > | sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
SLM atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
USM pointer atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
USM pointer atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename OffsetObjT , typename RegionTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< Op >)==1 &&((Op !=sycl::ext::intel::esimd::atomic_op::store &&Op !=sycl::ext::intel::esimd::atomic_op::xchg)||N==1), sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
USM pointer atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename OffsetObjT , typename RegionTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset > | |
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< Op >)==2, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1) |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&(Op==sycl::ext::intel::esimd::atomic_op::load||__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write >), sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
Accessor-based atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred) |
Variant of lsc_atomic_update that uses local_accessor as a parameter. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_device_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
Accessor-based atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred) |
Variant of lsc_atomic_update that uses local_accessor as a parameter. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_device_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
Accessor-based atomic. More... | |
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > | sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred) |
Variant of lsc_atomic_update that uses local_accessor as a parameter. More... | |
This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM.
|
strong |
LSC atomic operation codes.
atomic_update<native::lsc::atomic_op::inc>(...);
is a short-cut to lsc_atomic_update<atomic_op::inc>(...);
with default cache and data size controls.
Enumerator | |
---|---|
inc | |
dec | |
load | |
store | |
add | |
sub | |
smin | |
smax | |
umin | |
umax | |
cmpxchg | |
fadd | |
fsub | |
fmin | |
fmax | |
fcmpxchg | |
bit_and | |
bit_or | |
bit_xor |
Definition at line 39 of file common.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Accessor-based atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | is predicates. |
Definition at line 2442 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Accessor-based atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 2382 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && (Op == sycl::ext::intel::esimd::atomic_op::load || __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>), sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Accessor-based atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 2324 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Variant of lsc_atomic_update
that uses local_accessor
as a parameter.
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | is predicates. |
Definition at line 2473 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Variant of lsc_atomic_update
that uses local_accessor
as a parameter.
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 2411 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
Variant of lsc_atomic_update
that uses local_accessor
as a parameter.
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 2352 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
USM pointer atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
p | is the base pointer. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | predicates. |
Definition at line 2261 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
USM pointer atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
p | is the base pointer. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 2203 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
USM pointer atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
p | is the base pointer. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 2163 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 2275 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 2216 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<Op>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 2289 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<Op>) == 1 && ((Op != sycl::ext::intel::esimd::atomic_op::store && Op != sycl::ext::intel::esimd::atomic_op::xchg) || N == 1), sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 2233 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<Op>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 2177 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
__ESIMD_DNS::DeviceAccessorOffsetT | offset, | ||
FlagsT | flags | ||
) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.
Collects elements located at surface and returns them as a single simd object. When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
flags | is the alignment specifier type tag. |
pred
is 0 are undefined. Definition at line 926 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
__ESIMD_DNS::DeviceAccessorOffsetT | offset, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred, | ||
sycl::ext::intel::esimd::simd< T, NElts > | pass_thru, | ||
FlagsT | = {} |
||
) |
Accessor-based transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object. When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
FlagsT | is the alignment specifier type tag. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
pred | is operation predicate. Operation is skipped for index 'i' if pred[i] == 0 and the result element is taken from pass_thru [i]. Otherwise, the operation is performed. |
pass_thru | contains the values copied to the result when the corresponding element from pred is zero. |
Definition at line 987 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
__ESIMD_DNS::DeviceAccessorOffsetT | offset, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | flags = FlagsT{} |
||
) |
Accessor-based transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object. When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
FlagsT | is the alignment specifier type tag. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
pred
is 0 are undefined. Definition at line 867 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
uint32_t | offset, | ||
FlagsT | flags | ||
) |
Definition at line 941 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
uint32_t | offset, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred, | ||
sycl::ext::intel::esimd::simd< T, NElts > | pass_thru, | ||
FlagsT | flags = FlagsT{} |
||
) |
Definition at line 1005 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | AccessorTy | acc, |
uint32_t | offset, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | flags = FlagsT{} |
||
) |
Definition at line 884 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | const T * | p, |
FlagsT | |||
) |
A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.
Accesses contiguous block of memory of NElts * S
bytes starting from given address, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.
T | is element type. |
NElts | is the number of elements to load per address. |
FlagsT | is the alignment specifier type tag. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
p | is the base pointer. |
pred
is 0 are undefined. Definition at line 775 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | const T * | p, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred, | ||
sycl::ext::intel::esimd::simd< T, NElts > | pass_thru, | ||
FlagsT | = {} |
||
) |
USM pointer transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Accesses contiguous block of memory of NElts * S
bytes starting from given address, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
FlagsT | is the alignment specifier type tag. |
p | is the base pointer. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. |
pass_thru | contains the vector which elements are copied to the returned result when the corresponding element of pred is 0. |
Definition at line 819 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load | ( | const T * | p, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | = {} |
||
) |
USM pointer transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Accesses contiguous block of memory of NElts * S
bytes starting from given address, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
Flags | is the alignment specifier type tag. |
p | is the base pointer. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
pred
is 0 are undefined. Definition at line 734 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | AccessorTy | acc, |
__ESIMD_DNS::DeviceAccessorOffsetT | offset, | ||
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
FlagsT | flags | ||
) |
A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter.
Scatters elements to surface. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
vals | is values to store. |
flags | is the alignment specifier type tag. |
Definition at line 1542 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | AccessorTy | acc, |
__ESIMD_DNS::DeviceAccessorOffsetT | offset, | ||
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | = {} |
||
) |
Accessor-based transposed scatter with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to surface. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
Flags | is the alignment specifier type tag. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
vals | is values to store. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
Definition at line 1481 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | AccessorTy | acc, |
uint32_t | offset, | ||
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
FlagsT | flags = FlagsT{} |
||
) |
Definition at line 1498 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | T * | p, |
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
FlagsT | flags | ||
) |
A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter.
Scatters elements to specific address. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
p | is the base pointer. |
vals | is values to store. |
flags | is the alignment specifier type tag. |
Definition at line 1434 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store | ( | T * | p, |
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | = {} |
||
) |
USM pointer transposed scatter with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to specific address. When DS
equals lsc_data_size::u64
or sizeof(T)
equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32
, lsc_data_size::u64
, lsc_data_size::u8
, lsc_data_size::u16
. When data size is either lsc_data_size::u8
or lsc_data_size::u16
the data is treated as 32 bit data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts
equal to 128, 16 bit data and NElts
equal to 256, 8 bit data and NElts
equal to 512. Otherwise 4 bytes alignment is required.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size (unused/obsolete). |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
Flags | is the alignment specifier type tag. |
p | is the base pointer. |
vals | is values to store. |
pred | is operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation. |
Definition at line 1395 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | pass_thru | ||
) |
Accessor-based gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
pass_thru | contains the vector which elements are copied to the returned result when the corresponding element of pred is 0. |
Definition at line 647 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Accessor-based gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at surface and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 573 of file memory.hpp.
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | pass_thru | ||
) |
Definition at line 687 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 612 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | pass_thru | ||
) |
USM pointer gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at specified address and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
pass_thru | contains the vector which elements are copied to the returned result when the corresponding element of pred is 0. |
Definition at line 493 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
USM pointer gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Collects elements located at specified address and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 460 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | pass_thru | ||
) |
Definition at line 516 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 506 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | pass_thru | ||
) |
Definition at line 540 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather | ( | const T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 529 of file memory.hpp.
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d | ( | config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > & | payload | ) |
A variation of 2D
stateless block load with
parameters passed as config_2d_mem_access
object
Note: Compatibility with future hardware versions is not guaranteed.
Note: No software mitigation for hardware bugs is possible for this function.
T | is the element data type |
BlockWidth | the block width in number of elements |
BlockHeight | block height in number of elements |
NBlocks | Number of blocks |
Transposed | is the transposed version or not. |
Transformed | is apply VNNI transform or not. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the data size |
payload | is config_2d_mem_access object holding all the data |
Definition at line 1892 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::check_cache_hints().
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d | ( | const T * | Ptr, |
unsigned | SurfaceWidth, | ||
unsigned | SurfaceHeight, | ||
unsigned | SurfacePitch, | ||
int | X, | ||
int | Y | ||
) |
2D USM pointer block load.
Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm
Collects elements located at specified address and returns them as a single simd object.
T | is element type. |
BlockWidth | is the block width in number of elements. |
BlockHeight | is the block height in number of elements. |
NBlocks | is the number of blocks. |
Transposed | is the transposed version or not. |
Transformed | is apply VNNI transform or not. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the data size |
Ptr | is the surface base address for this operation. |
SurfaceWidth | is the surface width minus 1 in bytes |
SurfaceHeight | is the surface height minus 1 in rows |
SurfacePitch | is the surface pitch minus 1 in bytes |
X | is zero based X-coordinate of the left upper rectangle corner in number of elements. |
Y | is zero based Y-coordinate of the left upper rectangle corner in rows. |
Definition at line 1584 of file memory.hpp.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | AccessorTy | acc, |
__ESIMD_DNS::DeviceAccessorOffsetT | offset, | ||
FlagsT | flags = FlagsT{} |
||
) |
Accessor-based transposed prefetch gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at surface of NElts * S
bytes starting from given offset, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
FlagsT | is the alignment specifier type tag. |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offset | is the zero-based offset in bytes. |
Definition at line 1181 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Accessor-based prefetch gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at surface.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 1117 of file memory.hpp.
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p, |
FlagsT | = {} |
||
) |
USM pointer prefetch transposed gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at contiguous block of memory of NElts * S
bytes starting from given address, where S is a byte size of an "element" defined by the DS
template parameter. The maximum size of prefetched block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts
values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts
values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts
values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts
values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
FlagsT | is the alignment specifier type tag. |
p | is the base pointer. |
Definition at line 1087 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
USM pointer prefetch gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm
Prefetches elements located at specified address.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
pred | is predicates. |
Definition at line 1031 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p, |
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1042 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch | ( | const T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1052 of file memory.hpp.
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d | ( | config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > & | payload | ) |
A variation of 2D
stateless block prefetch with
parameters passed as config_2d_mem_access
object
Note: Compatibility with future hardware versions is not guaranteed.
Note: No software mitigation for hardware bugs is possible for this function.
T | is the element data type |
BlockWidth | the block width in number of elements |
BlockHeight | block height in number of elements |
NBlocks | Number of blocks |
Transposed | is the transposed version or not. |
Transformed | is apply VNNI transform or not. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the data size |
payload | is config_2d_mem_access object holding all the data |
Definition at line 1988 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::check_cache_hints(), and sycl::_V1::ext::intel::esimd::prefetch().
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d | ( | const T * | Ptr, |
unsigned | SurfaceWidth, | ||
unsigned | SurfaceHeight, | ||
unsigned | SurfacePitch, | ||
int | X, | ||
int | Y | ||
) |
2D USM pointer block prefetch.
Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm
Prefetches elements located at specified address.
T | is element type. |
BlockWidth | is the block width in number of elements. |
BlockHeight | is the block height in number of elements. |
NBlocks | is the number of blocks. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the data size |
Ptr | is the surface base address for this operation. |
SurfaceWidth | is the surface width minus 1 in bytes |
SurfaceHeight | is the surface height minus 1 in rows |
SurfacePitch | is the surface pitch minus 1 in bytes |
X | is zero based X-coordinate of the left upper rectangle corner in number of elements. |
Y | is zero based Y-coordinate of the left upper rectangle corner in rows. |
Definition at line 1618 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Accessor-based scatter.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to surface.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
AccessorTy | is the sycl::accessor type. |
acc | is the SYCL accessor. |
offsets | is the zero-based offsets in bytes. |
vals | is values to store. |
pred | is predicates. |
Definition at line 1315 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | AccessorTy | acc, |
sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1352 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | T * | p, |
sycl::ext::intel::esimd::simd< Toffset, N > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
USM pointer scatter.
Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm
Scatters elements to specific address.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the number of channels (platform dependent). |
p | is the base pointer. |
offsets | is the zero-based offsets in bytes. |
vals | is values to store. |
pred | is predicates. |
Definition at line 1261 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | T * | p, |
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > | offsets, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1274 of file memory.hpp.
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1> sycl::_V1::ext::intel::experimental::esimd::lsc_scatter | ( | T * | p, |
Toffset | offset, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
Definition at line 1285 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd< T, N > | src1, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
SLM atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand (expected value). |
src1 | is the second atomic operand (new value). |
pred | is predicates. |
Definition at line 2136 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd< T, N > | src0, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
SLM atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
offsets | is the zero-based offsets. |
src0 | is the first atomic operand. |
pred | is predicates. |
Definition at line 2112 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd_mask< N > | pred | ||
) |
SLM atomic.
Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm
Op | is operation type. |
T | is element type. |
N | is the number of channels (platform dependent). |
DS | is the data size. |
offsets | is the zero-based offsets. |
pred | is predicates. |
Definition at line 2090 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load | ( | uint32_t | offset, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred, | ||
sycl::ext::intel::esimd::simd< T, NElts > | pass_thru | ||
) |
Transposed SLM gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size (unused/obsolete). |
offset | is the zero-based offset for SLM buffer in bytes. |
pred | is the predicate; if it contains 0, then the actual load is not performed and pass_thru is returned. |
pass_thru | contains the vector that is returned if the parameter pred contains 0. |
Definition at line 430 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::alignment.
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load | ( | uint32_t | offset, |
sycl::ext::intel::esimd::simd_mask< 1 > | pred = 1 , |
||
FlagsT | flags = FlagsT{} |
||
) |
Transposed SLM gather with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size (unused/obsolete). |
offset | is the zero-based offset for SLM buffer in bytes. |
pred | is the predicate; if it contains 0, then the actual load is not performed and the returned value is undefined. |
Definition at line 403 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store | ( | uint32_t | offset, |
sycl::ext::intel::esimd::simd< T, NElts > | vals, | ||
FlagsT | flags = FlagsT{} |
||
) |
Transposed SLM scatter with 1 channel.
Supported platforms: DG2, PVC VISA instruction: lsc_store.slm
Scatters elements located to slm.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size (unused/obsolete). |
offset | is the zero-based offset for SLM buffer in bytes. |
vals | is values to store. |
Definition at line 1230 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd_mask< N > | pred, | ||
sycl::ext::intel::esimd::simd< T, N *NElts > | pass_thru | ||
) |
SLM gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
N | is the number of channels (platform dependent). |
offsets | is the zero-based offsets for SLM buffer in bytes. |
pred | is predicates. |
pass_thru | values copied to the result when the corresponding element of pred is zero.. |
Definition at line 379 of file memory.hpp.
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
SLM gather.
Supported platforms: DG2, PVC VISA instruction: lsc_load.slm
Collects elements located at slm and returns them as a single simd object.
T | is element type. |
NElts | is the number of elements to load per address. |
DS | is the data size. |
N | is the number of channels (platform dependent). |
offsets | is the zero-based offsets for SLM buffer in bytes. |
pred | is predicates. |
Definition at line 353 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter | ( | sycl::ext::intel::esimd::simd< uint32_t, N > | offsets, |
sycl::ext::intel::esimd::simd< T, N *NElts > | vals, | ||
sycl::ext::intel::esimd::simd_mask< N > | pred = 1 |
||
) |
SLM scatter.
Supported platforms: DG2, PVC VISA instruction: lsc_store.slm
Scatters elements located to slm.
T | is element type. |
NElts | is the number of elements to store per address. |
DS | is the data size. |
N | is the number of channels (platform dependent). |
offsets | is the zero-based offsets for SLM buffer in bytes. |
vals | is values to store. |
pred | is predicates. |
Definition at line 1210 of file memory.hpp.
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d | ( | config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > & | payload, |
sycl::ext::intel::esimd::simd< T, N > | Data | ||
) |
A variation of 2D
stateless block store with
parameters passed as config_2d_mem_access
object
Note: Compatibility with future hardware versions is not guaranteed.
Note: No software mitigation for hardware bugs is possible for this function.
T | is the element data type |
BlockWidth | the block width in number of elements |
BlockHeight | block height in number of elements |
NBlocks | Number of blocks |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the data size |
payload | is config_2d_mem_access object holding all the data |
Data | is the data to be stored. |
Definition at line 2034 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::check_cache_hints().
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d | ( | T * | Ptr, |
unsigned | SurfaceWidth, | ||
unsigned | SurfaceHeight, | ||
unsigned | SurfacePitch, | ||
int | X, | ||
int | Y, | ||
sycl::ext::intel::esimd::simd< T, N > | Vals | ||
) |
2D USM pointer block store.
Supported platforms: PVC VISA instruction: lsc_store_block2d.ugm
Stores elements at specified address.
T | is element type. |
BlockWidth | is the block width in number of elements. |
BlockHeight | is the block height in number of elements. |
L1H | is L1 cache hint. |
L2H | is L2 cache hint. |
N | is the data size |
Ptr | is the surface base address for this operation. |
SurfaceWidth | is the surface width minus 1 in bytes |
SurfaceHeight | is the surface height minus 1 in rows |
SurfacePitch | is the surface pitch minus 1 in bytes |
X | is zero based X-coordinate of the left upper rectangle corner in number of elements. |
Y | is zero based Y-coordinate of the left upper rectangle corner in rows. |
Vals | is a vector to store of type T and size N, where N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) * getNextPowerOf2(BlockWidth) * NBlocks |
Definition at line 1655 of file memory.hpp.