DPC++ Runtime
Runtime libraries for oneAPI DPC++
LSC-specific memory access APIs.

This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM. More...

Collaboration diagram for LSC-specific memory access APIs.:

Classes

class  sycl::_V1::ext::intel::experimental::esimd::config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks >
 Container class to hold parameters for load2d/store2d functions More...
 

Enumerations

enum class  sycl::_V1::ext::intel::esimd::native::lsc::atomic_op : uint8_t {
  sycl::_V1::ext::intel::esimd::native::lsc::inc = 0x08 , sycl::_V1::ext::intel::esimd::native::lsc::dec = 0x09 , sycl::_V1::ext::intel::esimd::native::lsc::load = 0x0a , sycl::_V1::ext::intel::esimd::native::lsc::store = 0x0b ,
  sycl::_V1::ext::intel::esimd::native::lsc::add = 0x0c , sycl::_V1::ext::intel::esimd::native::lsc::sub = 0x0d , sycl::_V1::ext::intel::esimd::native::lsc::smin = 0x0e , sycl::_V1::ext::intel::esimd::native::lsc::smax = 0x0f ,
  sycl::_V1::ext::intel::esimd::native::lsc::umin = 0x10 , sycl::_V1::ext::intel::esimd::native::lsc::umax = 0x11 , sycl::_V1::ext::intel::esimd::native::lsc::cmpxchg = 0x12 , sycl::_V1::ext::intel::esimd::native::lsc::fadd = 0x13 ,
  sycl::_V1::ext::intel::esimd::native::lsc::fsub = 0x14 , sycl::_V1::ext::intel::esimd::native::lsc::fmin = 0x15 , sycl::_V1::ext::intel::esimd::native::lsc::fmax = 0x16 , sycl::_V1::ext::intel::esimd::native::lsc::fcmpxchg = 0x17 ,
  sycl::_V1::ext::intel::esimd::native::lsc::bit_and = 0x18 , sycl::_V1::ext::intel::esimd::native::lsc::bit_or = 0x19 , sycl::_V1::ext::intel::esimd::native::lsc::bit_xor = 0x1a
}
 LSC atomic operation codes. More...
 

Functions

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 SLM gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru)
 SLM gather. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
 Transposed SLM gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru)
 Transposed SLM gather with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 USM pointer gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru)
 USM pointer gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 Accessor-based gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru)
 Accessor-based gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > pass_thru)
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
 USM pointer transposed gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, FlagsT)
 A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru, FlagsT={})
 USM pointer transposed gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
 Accessor-based transposed gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, FlagsT flags)
 A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, FlagsT flags)
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru, FlagsT={})
 Accessor-based transposed gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > pass_thru, FlagsT flags=FlagsT{})
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 USM pointer prefetch gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, FlagsT={})
 USM pointer prefetch transposed gather with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 Accessor-based prefetch gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, FlagsT flags=FlagsT{})
 Accessor-based transposed prefetch gather with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 SLM scatter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store (uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
 Transposed SLM scatter with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 USM pointer scatter. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&N==1 > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (AccessorTy acc, sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 Accessor-based scatter. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
 USM pointer transposed scatter with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags)
 A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
 Accessor-based transposed scatter with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags)
 A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
 2D USM pointer block load. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
 2D USM pointer block prefetch. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, 1u, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals)
 2D USM pointer block store. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload)
 A variation of 2D stateless block load with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload)
 A variation of 2D stateless block prefetch with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload, sycl::ext::intel::esimd::simd< T, N > Data)
 A variation of 2D stateless block store with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 SLM atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 SLM atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 SLM atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 USM pointer atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 USM pointer atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename OffsetObjT , typename RegionTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< Op >)==1 &&((Op !=sycl::ext::intel::esimd::atomic_op::store &&Op !=sycl::ext::intel::esimd::atomic_op::xchg)||N==1), sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 USM pointer atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename OffsetObjT , typename RegionTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< Op >)==2, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read > &&(Op==sycl::ext::intel::esimd::atomic_op::load||__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write >), sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 Accessor-based atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 Variant of lsc_atomic_update that uses local_accessor as a parameter. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_device_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 Accessor-based atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 Variant of lsc_atomic_update that uses local_accessor as a parameter. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_device_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 Accessor-based atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_rw_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 Variant of lsc_atomic_update that uses local_accessor as a parameter. More...
 

Detailed Description

This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM.

Enumeration Type Documentation

◆ atomic_op

enum sycl::_V1::ext::intel::esimd::native::lsc::atomic_op : uint8_t
strong

LSC atomic operation codes.

atomic_update<native::lsc::atomic_op::inc>(...); is a short-cut to lsc_atomic_update<atomic_op::inc>(...); with default cache and data size controls.

Enumerator
inc 
dec 
load 
store 
add 
sub 
smin 
smax 
umin 
umax 
cmpxchg 
fadd 
fsub 
fmin 
fmax 
fcmpxchg 
bit_and 
bit_or 
bit_xor 

Definition at line 39 of file common.hpp.

Function Documentation

◆ lsc_atomic_update() [1/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Accessor-based atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2442 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ lsc_atomic_update() [2/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Accessor-based atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2382 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ lsc_atomic_update() [3/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && (Op == sycl::ext::intel::esimd::atomic_op::load || __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>), sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Accessor-based atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2324 of file memory.hpp.

◆ lsc_atomic_update() [4/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Variant of lsc_atomic_update that uses local_accessor as a parameter.

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2473 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ lsc_atomic_update() [5/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Variant of lsc_atomic_update that uses local_accessor as a parameter.

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2411 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ lsc_atomic_update() [6/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Variant of lsc_atomic_update that uses local_accessor as a parameter.

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2352 of file memory.hpp.

◆ lsc_atomic_update() [7/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

USM pointer atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Parameters
pis the base pointer.
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predpredicates.

Definition at line 2261 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ lsc_atomic_update() [8/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

USM pointer atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Parameters
pis the base pointer.
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.

Definition at line 2203 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ lsc_atomic_update() [9/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

USM pointer atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Parameters
pis the base pointer.
offsetsis the zero-based offsets.
predis predicates.

Definition at line 2163 of file memory.hpp.

◆ lsc_atomic_update() [10/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename OffsetObjT , typename RegionTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

◆ lsc_atomic_update() [11/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename OffsetObjT , typename RegionTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 2216 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ lsc_atomic_update() [12/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<Op>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
Toffset  offset,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

◆ lsc_atomic_update() [13/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<Op>) == 1 && ((Op != sycl::ext::intel::esimd::atomic_op::store && Op != sycl::ext::intel::esimd::atomic_op::xchg) || N == 1), sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
Toffset  offset,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 2233 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ lsc_atomic_update() [14/14]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<Op>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T *  p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 2177 of file memory.hpp.

◆ lsc_block_load() [1/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
__ESIMD_DNS::DeviceAccessorOffsetT  offset,
FlagsT  flags 
)

A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.

Collects elements located at surface and returns them as a single simd object. When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
flagsis the alignment specifier type tag.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 926 of file memory.hpp.

◆ lsc_block_load() [2/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
__ESIMD_DNS::DeviceAccessorOffsetT  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred,
sycl::ext::intel::esimd::simd< T, NElts >  pass_thru,
FlagsT  = {} 
)

Accessor-based transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object. When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
FlagsTis the alignment specifier type tag.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
predis operation predicate. Operation is skipped for index 'i' if pred[i] == 0 and the result element is taken from pass_thru[i]. Otherwise, the operation is performed.
pass_thrucontains the values copied to the result when the corresponding element from pred is zero.
Returns
is a vector of type T and size NElts

Definition at line 987 of file memory.hpp.

◆ lsc_block_load() [3/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
__ESIMD_DNS::DeviceAccessorOffsetT  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  flags = FlagsT{} 
)

Accessor-based transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object. When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
FlagsTis the alignment specifier type tag.
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 867 of file memory.hpp.

◆ lsc_block_load() [4/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
uint32_t  offset,
FlagsT  flags 
)

Definition at line 941 of file memory.hpp.

◆ lsc_block_load() [5/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred,
sycl::ext::intel::esimd::simd< T, NElts >  pass_thru,
FlagsT  flags = FlagsT{} 
)

Definition at line 1005 of file memory.hpp.

◆ lsc_block_load() [6/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  flags = FlagsT{} 
)

Definition at line 884 of file memory.hpp.

◆ lsc_block_load() [7/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( const T *  p,
FlagsT   
)

A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.

Accesses contiguous block of memory of NElts * S bytes starting from given address, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
FlagsTis the alignment specifier type tag.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
Parameters
pis the base pointer.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 775 of file memory.hpp.

◆ lsc_block_load() [8/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( const T *  p,
sycl::ext::intel::esimd::simd_mask< 1 >  pred,
sycl::ext::intel::esimd::simd< T, NElts >  pass_thru,
FlagsT  = {} 
)

USM pointer transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Accesses contiguous block of memory of NElts * S bytes starting from given address, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
FlagsTis the alignment specifier type tag.
Parameters
pis the base pointer.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed.
pass_thrucontains the vector which elements are copied to the returned result when the corresponding element of pred is 0.
Returns
is a vector of type T and size NElts.

Definition at line 819 of file memory.hpp.

◆ lsc_block_load() [9/9]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( const T *  p,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  = {} 
)

USM pointer transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Accesses contiguous block of memory of NElts * S bytes starting from given address, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
Flagsis the alignment specifier type tag.
Parameters
pis the base pointer.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 734 of file memory.hpp.

◆ lsc_block_store() [1/5]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( AccessorTy  acc,
__ESIMD_DNS::DeviceAccessorOffsetT  offset,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
FlagsT  flags 
)

A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter.

Scatters elements to surface. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
valsis values to store.
flagsis the alignment specifier type tag.

Definition at line 1542 of file memory.hpp.

◆ lsc_block_store() [2/5]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( AccessorTy  acc,
__ESIMD_DNS::DeviceAccessorOffsetT  offset,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  = {} 
)

Accessor-based transposed scatter with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to surface. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
Flagsis the alignment specifier type tag.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
valsis values to store.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.

Definition at line 1481 of file memory.hpp.

◆ lsc_block_store() [3/5]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( AccessorTy  acc,
uint32_t  offset,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
FlagsT  flags = FlagsT{} 
)

Definition at line 1498 of file memory.hpp.

◆ lsc_block_store() [4/5]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( T *  p,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
FlagsT  flags 
)

A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter.

Scatters elements to specific address. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Parameters
pis the base pointer.
valsis values to store.
flagsis the alignment specifier type tag.

Definition at line 1434 of file memory.hpp.

◆ lsc_block_store() [5/5]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( T *  p,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  = {} 
)

USM pointer transposed scatter with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to specific address. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size (unused/obsolete).
L1His L1 cache hint.
L2His L2 cache hint.
Flagsis the alignment specifier type tag.
Parameters
pis the base pointer.
valsis values to store.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.

Definition at line 1395 of file memory.hpp.

◆ lsc_gather() [1/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  pass_thru 
)

Accessor-based gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
predis predicates.
pass_thrucontains the vector which elements are copied to the returned result when the corresponding element of pred is 0.
Returns
is a vector of type T and size N * NElts

Definition at line 647 of file memory.hpp.

◆ lsc_gather() [2/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Accessor-based gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
predis predicates.
Returns
is a vector of type T and size N * NElts

Definition at line 573 of file memory.hpp.

◆ lsc_gather() [3/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  pass_thru 
)

Definition at line 687 of file memory.hpp.

◆ lsc_gather() [4/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 612 of file memory.hpp.

◆ lsc_gather() [5/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T *  p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  pass_thru 
)

USM pointer gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at specified address and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
predis predicates.
pass_thrucontains the vector which elements are copied to the returned result when the corresponding element of pred is 0.
Returns
is a vector of type T and size N * NElts

Definition at line 493 of file memory.hpp.

◆ lsc_gather() [6/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T *  p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

USM pointer gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at specified address and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
predis predicates.
Returns
is a vector of type T and size N * NElts

Definition at line 460 of file memory.hpp.

◆ lsc_gather() [7/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T *  p,
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  pass_thru 
)

Definition at line 516 of file memory.hpp.

◆ lsc_gather() [8/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T *  p,
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 506 of file memory.hpp.

◆ lsc_gather() [9/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T *  p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  pass_thru 
)

Definition at line 540 of file memory.hpp.

◆ lsc_gather() [10/10]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T *  p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 529 of file memory.hpp.

◆ lsc_load_2d() [1/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d ( config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &  payload)

A variation of 2D stateless block load with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed.

Note: No software mitigation for hardware bugs is possible for this function.

Template Parameters
Tis the element data type
BlockWidththe block width in number of elements
BlockHeightblock height in number of elements
NBlocksNumber of blocks
Transposedis the transposed version or not.
Transformedis apply VNNI transform or not.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the data size
Parameters
payloadis config_2d_mem_access object holding all the data
Returns
is a vector of type T and size N, where N is getNextPowerOf2(Height) * Width * NBlocks, if transposed getNextPowerOf2(Width) * Height * NBlocks, otherwise

Definition at line 1892 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::check_cache_hints().

◆ lsc_load_2d() [2/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d ( const T *  Ptr,
unsigned  SurfaceWidth,
unsigned  SurfaceHeight,
unsigned  SurfacePitch,
int  X,
int  Y 
)

2D USM pointer block load.

Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm

Collects elements located at specified address and returns them as a single simd object.

Template Parameters
Tis element type.
BlockWidthis the block width in number of elements.
BlockHeightis the block height in number of elements.
NBlocksis the number of blocks.
Transposedis the transposed version or not.
Transformedis apply VNNI transform or not.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the data size
Parameters
Ptris the surface base address for this operation.
SurfaceWidthis the surface width minus 1 in bytes
SurfaceHeightis the surface height minus 1 in rows
SurfacePitchis the surface pitch minus 1 in bytes
Xis zero based X-coordinate of the left upper rectangle corner in number of elements.
Yis zero based Y-coordinate of the left upper rectangle corner in rows.
Returns
is a vector of type T and size N, where N is BlockWidth * BlockHeight * NBlocks, if transformed; otherwise, N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) * getNextPowerOf2(BlockWidth) * NBlocks

Definition at line 1584 of file memory.hpp.

◆ lsc_prefetch() [1/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag, typename AccessorTy >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( AccessorTy  acc,
__ESIMD_DNS::DeviceAccessorOffsetT  offset,
FlagsT  flags = FlagsT{} 
)

Accessor-based transposed prefetch gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at surface of NElts * S bytes starting from given offset, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
FlagsTis the alignment specifier type tag.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.

Definition at line 1181 of file memory.hpp.

◆ lsc_prefetch() [2/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Accessor-based prefetch gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at surface.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
predis predicates.

Definition at line 1117 of file memory.hpp.

◆ lsc_prefetch() [3/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T *  p,
FlagsT  = {} 
)

USM pointer prefetch transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at contiguous block of memory of NElts * S bytes starting from given address, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of prefetched block is 512 bytes for PVC and 256 bytes for ACM (DG2). When sizeof(T) equal to 8 the address must be 8-byte aligned. Also, 8-bytes alignment is required when the function has to load more than 256-bytes. In all other cases 4-byte alignment is required. When T is 1- or 2-byte type the data is treated as 4-byte data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
FlagsTis the alignment specifier type tag.
Parameters
pis the base pointer.

Definition at line 1087 of file memory.hpp.

◆ lsc_prefetch() [4/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T *  p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

USM pointer prefetch gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at specified address.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
predis predicates.

Definition at line 1031 of file memory.hpp.

◆ lsc_prefetch() [5/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T *  p,
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1042 of file memory.hpp.

◆ lsc_prefetch() [6/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T *  p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1052 of file memory.hpp.

◆ lsc_prefetch_2d() [1/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d ( config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &  payload)

A variation of 2D stateless block prefetch with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed.

Note: No software mitigation for hardware bugs is possible for this function.

Template Parameters
Tis the element data type
BlockWidththe block width in number of elements
BlockHeightblock height in number of elements
NBlocksNumber of blocks
Transposedis the transposed version or not.
Transformedis apply VNNI transform or not.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the data size
Parameters
payloadis config_2d_mem_access object holding all the data

Definition at line 1988 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::check_cache_hints(), and sycl::_V1::ext::intel::esimd::prefetch().

◆ lsc_prefetch_2d() [2/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d ( const T *  Ptr,
unsigned  SurfaceWidth,
unsigned  SurfaceHeight,
unsigned  SurfacePitch,
int  X,
int  Y 
)

2D USM pointer block prefetch.

Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm

Prefetches elements located at specified address.

Template Parameters
Tis element type.
BlockWidthis the block width in number of elements.
BlockHeightis the block height in number of elements.
NBlocksis the number of blocks.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the data size
Parameters
Ptris the surface base address for this operation.
SurfaceWidthis the surface width minus 1 in bytes
SurfaceHeightis the surface height minus 1 in rows
SurfacePitchis the surface pitch minus 1 in bytes
Xis zero based X-coordinate of the left upper rectangle corner in number of elements.
Yis zero based Y-coordinate of the left upper rectangle corner in rows.

Definition at line 1618 of file memory.hpp.

◆ lsc_scatter() [1/5]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< __ESIMD_DNS::DeviceAccessorOffsetT, N >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Accessor-based scatter.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to surface.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
valsis values to store.
predis predicates.

Definition at line 1315 of file memory.hpp.

◆ lsc_scatter() [2/5]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v< AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1352 of file memory.hpp.

◆ lsc_scatter() [3/5]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( T *  p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

USM pointer scatter.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to specific address.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
valsis values to store.
predis predicates.

Definition at line 1261 of file memory.hpp.

◆ lsc_scatter() [4/5]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename OffsetObjT , typename RegionTy >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( T *  p,
sycl::ext::intel::esimd::simd_view< OffsetObjT, RegionTy >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1274 of file memory.hpp.

◆ lsc_scatter() [5/5]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1> sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( T *  p,
Toffset  offset,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1285 of file memory.hpp.

◆ lsc_slm_atomic_update() [1/3]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

SLM atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
Parameters
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2136 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ lsc_slm_atomic_update() [2/3]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

SLM atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
Parameters
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2112 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ lsc_slm_atomic_update() [3/3]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

SLM atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
Parameters
offsetsis the zero-based offsets.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 2090 of file memory.hpp.

◆ lsc_slm_block_load() [1/2]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load ( uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred,
sycl::ext::intel::esimd::simd< T, NElts >  pass_thru 
)

Transposed SLM gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size (unused/obsolete).
Parameters
offsetis the zero-based offset for SLM buffer in bytes.
predis the predicate; if it contains 0, then the actual load is not performed and pass_thru is returned.
pass_thrucontains the vector that is returned if the parameter pred contains 0.
Returns
is a vector of type T and size NElts.

Definition at line 430 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::alignment.

◆ lsc_slm_block_load() [2/2]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load ( uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  flags = FlagsT{} 
)

Transposed SLM gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size (unused/obsolete).
Parameters
offsetis the zero-based offset for SLM buffer in bytes.
predis the predicate; if it contains 0, then the actual load is not performed and the returned value is undefined.
Returns
is a vector of type T and size NElts

Definition at line 403 of file memory.hpp.

◆ lsc_slm_block_store()

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store ( uint32_t  offset,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
FlagsT  flags = FlagsT{} 
)

Transposed SLM scatter with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_store.slm

Scatters elements located to slm.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size (unused/obsolete).
Parameters
offsetis the zero-based offset for SLM buffer in bytes.
valsis values to store.

Definition at line 1230 of file memory.hpp.

◆ lsc_slm_gather() [1/2]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  pass_thru 
)

SLM gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
Nis the number of channels (platform dependent).
Parameters
offsetsis the zero-based offsets for SLM buffer in bytes.
predis predicates.
pass_thruvalues copied to the result when the corresponding element of pred is zero..
Returns
is a vector of type T and size N * NElts

Definition at line 379 of file memory.hpp.

◆ lsc_slm_gather() [2/2]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

SLM gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
Nis the number of channels (platform dependent).
Parameters
offsetsis the zero-based offsets for SLM buffer in bytes.
predis predicates.
Returns
is a vector of type T and size N * NElts

Definition at line 353 of file memory.hpp.

◆ lsc_slm_scatter()

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

SLM scatter.

Supported platforms: DG2, PVC VISA instruction: lsc_store.slm

Scatters elements located to slm.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
Nis the number of channels (platform dependent).
Parameters
offsetsis the zero-based offsets for SLM buffer in bytes.
valsis values to store.
predis predicates.

Definition at line 1210 of file memory.hpp.

◆ lsc_store_2d() [1/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d ( config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &  payload,
sycl::ext::intel::esimd::simd< T, N >  Data 
)

A variation of 2D stateless block store with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed.

Note: No software mitigation for hardware bugs is possible for this function.

Template Parameters
Tis the element data type
BlockWidththe block width in number of elements
BlockHeightblock height in number of elements
NBlocksNumber of blocks
L1His L1 cache hint.
L2His L2 cache hint.
Nis the data size
Parameters
payloadis config_2d_mem_access object holding all the data
Datais the data to be stored.

Definition at line 2034 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::check_cache_hints().

◆ lsc_store_2d() [2/2]

template<typename T , int BlockWidth, int BlockHeight = 1, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, 1u, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d ( T *  Ptr,
unsigned  SurfaceWidth,
unsigned  SurfaceHeight,
unsigned  SurfacePitch,
int  X,
int  Y,
sycl::ext::intel::esimd::simd< T, N >  Vals 
)

2D USM pointer block store.

Supported platforms: PVC VISA instruction: lsc_store_block2d.ugm

Stores elements at specified address.

Template Parameters
Tis element type.
BlockWidthis the block width in number of elements.
BlockHeightis the block height in number of elements.
L1His L1 cache hint.
L2His L2 cache hint.
Nis the data size
Parameters
Ptris the surface base address for this operation.
SurfaceWidthis the surface width minus 1 in bytes
SurfaceHeightis the surface height minus 1 in rows
SurfacePitchis the surface pitch minus 1 in bytes
Xis zero based X-coordinate of the left upper rectangle corner in number of elements.
Yis zero based Y-coordinate of the left upper rectangle corner in rows.
Valsis a vector to store of type T and size N, where N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) * getNextPowerOf2(BlockWidth) * NBlocks

Definition at line 1655 of file memory.hpp.