DPC++ Runtime
Runtime libraries for oneAPI DPC++
LSC-specific memory access APIs.

This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM. More...

Collaboration diagram for LSC-specific memory access APIs.:

Namespaces

 sycl::_V1::ext::intel::experimental::esimd::detail
 

Classes

class  sycl::_V1::ext::intel::experimental::esimd::config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks >
 Container class to hold parameters for load2d/store2d functions More...
 

Enumerations

enum  sycl::_V1::ext::intel::esimd::native::lsc::atomic_op : uint8_t {
  sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::inc = 0x08, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::dec = 0x09, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::load = 0x0a, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::store = 0x0b,
  sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::add = 0x0c, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::sub = 0x0d, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::smin = 0x0e, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::smax = 0x0f,
  sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::umin = 0x10, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::umax = 0x11, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::cmpxchg = 0x12, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::fadd = 0x13,
  sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::fsub = 0x14, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::fmin = 0x15, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::fmax = 0x16, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::fcmpxchg = 0x17,
  sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::bit_and = 0x18, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::bit_or = 0x19, sycl::_V1::ext::intel::esimd::native::lsc::atomic_op::bit_xor = 0x1a
}
 LSC atomic operation codes. More...
 

Functions

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 SLM gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values)
 SLM gather. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1)
 Transposed SLM gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load (uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > old_values)
 Transposed SLM gather with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 USM pointer gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values)
 USM pointer gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 Accessor-based gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N *NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_gather (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred, sycl::ext::intel::esimd::simd< T, N *NElts > old_values)
 Accessor-based gather. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
 USM pointer transposed gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, FlagsT flags)
 A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > old_values, FlagsT flags=FlagsT{})
 USM pointer transposed gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
 Accessor-based transposed gather with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<!std::is_pointer< AccessorTy >::value &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, FlagsT flags)
 A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred, sycl::ext::intel::esimd::simd< T, NElts > old_values, FlagsT flags=FlagsT{})
 Accessor-based transposed gather with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 USM pointer prefetch gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (const T *p)
 USM pointer prefetch transposed gather with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 Accessor-based prefetch gather. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch (AccessorTy acc, uint32_t offset)
 Accessor-based transposed prefetch gather with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 SLM scatter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store (uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals)
 Transposed SLM scatter with 1 channel. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 USM pointer scatter. More...
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&N==1 > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 Accessor-based scatter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
 USM pointer transposed scatter with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags)
 A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< !std::is_pointer< AccessorTy >::value &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
 Accessor-based transposed scatter with 1 channel. More...
 
template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy > &&sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store (AccessorTy acc, uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
 2D USM pointer block load. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
 2D USM pointer block prefetch. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, 1u, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals)
 2D USM pointer block store. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload)
 A variation of 2D stateless block load with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload)
 A variation of 2D stateless block prefetch with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More...
 
template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d (config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &payload, sycl::ext::intel::esimd::simd< T, N > Data)
 A variation of 2D stateless block store with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 SLM atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 SLM atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update (sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 SLM atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==0, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 USM pointer atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==0, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==0, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==1, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 USM pointer atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==1, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==1 &&((Op !=sycl::ext::intel::esimd::atomic_op::store &&Op !=sycl::ext::intel::esimd::atomic_op::xchg)||N==1), sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==2, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 USM pointer atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==2, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, sycl::ext::intel::esimd::simd_view< Toffset, RegionTy > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v< Toffset > &&__ESIMD_DNS::get_num_args< __ESIMD_DNS::to_lsc_atomic_op< Op >)>)==2, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (T *p, Toffset offset, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred=1)
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 Accessor-based atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
 Variant of lsc_atomic_update that uses local_accessor as a parameter. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 Accessor-based atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd_mask< N > pred)
 Variant of lsc_atomic_update that uses local_accessor as a parameter. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v< AccessorTy > &&!sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 Accessor-based atomic. More...
 
template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v< AccessorTy >, sycl::ext::intel::esimd::simd< T, N > > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update (AccessorTy acc, sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd_mask< N > pred)
 Variant of lsc_atomic_update that uses local_accessor as a parameter. More...
 
template<lsc_memory_kind Kind = lsc_memory_kind::untyped_global, lsc_fence_op FenceOp = lsc_fence_op::none, lsc_scope Scope = lsc_scope::group, int N = 16>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_fence (sycl::ext::intel::esimd::simd_mask< N > pred=1)
 Memory fence. More...
 

Detailed Description

This group combines types and functions specific to LSC, which is available in Intel GPUs starting from PVC and ACM.

Enumeration Type Documentation

◆ atomic_op

enum sycl::_V1::ext::intel::esimd::native::lsc::atomic_op : uint8_t
strong

LSC atomic operation codes.

atomic_update<native::lsc::atomic_op::inc>(...); is a short-cut to lsc_atomic_update<atomic_op::inc>(...); with default cache and data size controls.

Enumerator
inc 
dec 
load 
store 
add 
sub 
smin 
smax 
umin 
umax 
cmpxchg 
fadd 
fsub 
fmin 
fmax 
fcmpxchg 
bit_and 
bit_or 
bit_xor 

Definition at line 39 of file common.hpp.

Function Documentation

◆ lsc_atomic_update() [1/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Accessor-based atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3575 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.

◆ lsc_atomic_update() [2/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Accessor-based atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3486 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::src0.

◆ lsc_atomic_update() [3/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename Toffset >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_accessor_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Accessor-based atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3400 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_atomic_update() [4/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Variant of lsc_atomic_update that uses local_accessor as a parameter.

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3633 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.

◆ lsc_atomic_update() [5/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Variant of lsc_atomic_update that uses local_accessor as a parameter.

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3542 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::src0.

◆ lsc_atomic_update() [6/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

Variant of lsc_atomic_update that uses local_accessor as a parameter.

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3454 of file memory.hpp.

◆ lsc_atomic_update() [7/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

USM pointer atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predpredicates.

Definition at line 3315 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.

◆ lsc_atomic_update() [8/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

USM pointer atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.

Definition at line 3233 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::src0.

◆ lsc_atomic_update() [9/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

USM pointer atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.ugm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
offsetsis the zero-based offsets.
predis predicates.

Definition at line 3158 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_atomic_update() [10/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

◆ lsc_atomic_update() [11/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 1, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 3269 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::src0.

◆ lsc_atomic_update() [12/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 3193 of file memory.hpp.

◆ lsc_atomic_update() [13/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 2, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
Toffset  offset,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

◆ lsc_atomic_update() [14/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 1 && ((Op != sycl::ext::intel::esimd::atomic_op::store && Op != sycl::ext::intel::esimd::atomic_op::xchg) || N == 1), sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
Toffset  offset,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 3287 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::src0.

◆ lsc_atomic_update() [15/15]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename Toffset >
__ESIMD_API std::enable_if_t< std::is_integral_v<Toffset> && __ESIMD_DNS::get_num_args<__ESIMD_DNS::to_lsc_atomic_op<Op>)>) == 0, sycl::ext::intel::esimd::simd<T, N> > sycl::_V1::ext::intel::experimental::esimd::lsc_atomic_update ( T p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 3206 of file memory.hpp.

◆ lsc_block_load() [1/6]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<!std::is_pointer<AccessorTy>::value && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
uint32_t  offset,
FlagsT  flags 
)

A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.

Collects elements located at surface and returns them as a single simd object. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
flagsis the alignment specifier type tag.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 1448 of file memory.hpp.

◆ lsc_block_load() [2/6]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred,
sycl::ext::intel::esimd::simd< T, NElts >  old_values,
FlagsT  flags = FlagsT{} 
)

Accessor-based transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
predis operation predicate. Operation is skipped for index 'i' if pred[i] == 0 and the result element is taken from old_values[i]. Otherwise, the operation is performed.
old_valuescontains the values copied to the result when the corresponding element from pred is zero.
flagsis the alignment specifier type tag.
Returns
is a vector of type T and size NElts

Definition at line 1505 of file memory.hpp.

◆ lsc_block_load() [3/6]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( AccessorTy  acc,
uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  flags = FlagsT{} 
)

Accessor-based transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.
flagsis the alignment specifier type tag.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 1325 of file memory.hpp.

◆ lsc_block_load() [4/6]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( const T p,
FlagsT  flags 
)

A variation of lsc_block_load without predicate parameter to simplify use of alignment parameter.

Accesses contiguous block of memory of NElts * S bytes starting from given address, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
flagsis the alignment specifier type tag.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 1171 of file memory.hpp.

◆ lsc_block_load() [5/6]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( const T p,
sycl::ext::intel::esimd::simd_mask< 1 >  pred,
sycl::ext::intel::esimd::simd< T, NElts >  old_values,
FlagsT  flags = FlagsT{} 
)

USM pointer transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Accesses contiguous block of memory of NElts * S bytes starting from given address, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed.
old_valuescontains the vector which elements are copied to the returned result when the corresponding element of pred is 0.
flagsis the alignment specifier type tag.
Returns
is a vector of type T and size NElts.

Definition at line 1217 of file memory.hpp.

◆ lsc_block_load() [6/6]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT>, sycl::ext::intel::esimd::simd<T, NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_load ( const T p,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  flags = FlagsT{} 
)

USM pointer transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Accesses contiguous block of memory of NElts * S bytes starting from given address, where S is a byte size of an "element" defined by the DS template parameter. The maximum size of accessed block is 512 bytes for PVC and 256 bytes for ACM (DG2). When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.
flagsis the alignment specifier type tag.
Returns
is a vector of type T and size NElts. The elements of the returned vector for which the corresponding element in pred is 0 are undefined.

Definition at line 1071 of file memory.hpp.

◆ lsc_block_store() [1/4]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( AccessorTy  acc,
uint32_t  offset,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
FlagsT  flags = FlagsT{} 
)

Definition at line 2288 of file memory.hpp.

◆ lsc_block_store() [2/4]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy , typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> && sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( AccessorTy  acc,
uint32_t  offset,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  flags = FlagsT{} 
)

Accessor-based transposed scatter with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to surface. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.
valsis values to store.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.
flagsis the alignment specifier type tag.

Definition at line 2208 of file memory.hpp.

◆ lsc_block_store() [3/4]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( T p,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
FlagsT  flags 
)

A variation of lsc_block_store without predicate parameter to simplify use of alignment parameter.

Scatters elements to specific address. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
valsis values to store.
flagsis the alignment specifier type tag.

Definition at line 2161 of file memory.hpp.

◆ lsc_block_store() [4/4]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
__ESIMD_API std::enable_if_t<sycl::ext::intel::esimd::is_simd_flag_type_v<FlagsT> > sycl::_V1::ext::intel::experimental::esimd::lsc_block_store ( T p,
sycl::ext::intel::esimd::simd< T, NElts >  vals,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1,
FlagsT  flags = FlagsT{} 
)

USM pointer transposed scatter with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to specific address. When DS equals lsc_data_size::u64 or sizeof(T) equal to 8 the address must be 8-byte aligned, otherwise - 4-bytes aligned. Allowed values for the data size are lsc_data_size::u32, lsc_data_size::u64, lsc_data_size::u8, lsc_data_size::u16. When data size is either lsc_data_size::u8 or lsc_data_size::u16 the data is treated as 32 bit data. Allowed NElts values for 64 bit data are 1, 2, 3, 4, 8, 16, 32, 64. Allowed NElts values for 32 bit data are 1, 2, 3, 4, 8, 16, 32, 64, 128. Allowed NElts values for 16 bit data are 2, 4, 8, 16, 32, 64, 128, 256. Allowed NElts values for 8 bit data are 4, 8, 12, 16, 32, 64, 128, 256, 512. 8 bytes alignment is required for 64 bit data, 32 bit data and NElts equal to 128, 16 bit data and NElts equal to 256, 8 bit data and NElts equal to 512. Otherwise 4 bytes alignment is required.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.
valsis values to store.
predis operation predicate. Zero means operation is skipped entirely, non-zero - operation is performed. The default is '1' - perform the operation.
flagsis the alignment specifier type tag.

Definition at line 2069 of file memory.hpp.

◆ lsc_fence()

template<lsc_memory_kind Kind = lsc_memory_kind::untyped_global, lsc_fence_op FenceOp = lsc_fence_op::none, lsc_scope Scope = lsc_scope::group, int N = 16>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_fence ( sycl::ext::intel::esimd::simd_mask< N >  pred = 1)

Memory fence.

Supported platforms: DG2, PVC

Template Parameters
Kindis the Sfid shaded function.
FenceOpis the fence operation.
Scopeis the operation scope.
Nis the number of channels (platform dependent).
Parameters
predis predicates.

Definition at line 3651 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::group, sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::shared_local.

◆ lsc_gather() [1/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  old_values 
)

Accessor-based gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
predis predicates.
old_valuescontains the vector which elements are copied to the returned result when the corresponding element of pred is 0.
Returns
is a vector of type T and size N * NElts

Definition at line 962 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_gather() [2/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer_v<AccessorTy> && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Accessor-based gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at surface and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
predis predicates.
Returns
is a vector of type T and size N * NElts

Definition at line 873 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_gather() [3/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  old_values 
)

USM pointer gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at specified address and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
predis predicates.
old_valuescontains the vector which elements are copied to the returned result when the corresponding element of pred is 0.
Returns
is a vector of type T and size N * NElts

Definition at line 776 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_gather() [4/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

USM pointer gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Collects elements located at specified address and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
predis predicates.
Returns
is a vector of type T and size N * NElts

Definition at line 729 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_gather() [5/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T p,
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  old_values 
)

Definition at line 816 of file memory.hpp.

◆ lsc_gather() [6/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T p,
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 806 of file memory.hpp.

◆ lsc_gather() [7/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  old_values 
)

Definition at line 840 of file memory.hpp.

◆ lsc_gather() [8/8]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>, sycl::ext::intel::esimd::simd<T, N * NElts> > sycl::_V1::ext::intel::experimental::esimd::lsc_gather ( const T p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 829 of file memory.hpp.

◆ lsc_load_2d() [1/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d ( config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &  payload)

A variation of 2D stateless block load with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed.

Note: No software mitigation for hardware bugs is possible for this function.

Template Parameters
Tis the element data type
BlockWidththe block width in number of elements
BlockHeightblock height in number of elements
NBlocksNumber of blocks
Transposedis the transposed version or not.
Transformedis apply VNNI transform or not.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the data size
Parameters
payloadis config_2d_mem_access object holding all the data
Returns
is a vector of type T and size N, where N is getNextPowerOf2(Height) * Width * NBlocks, if transposed getNextPowerOf2(Width) * Height * NBlocks, otherwise

Definition at line 2856 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), and sycl::_V1::ext::intel::experimental::esimd::detail::load.

◆ lsc_load_2d() [2/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_load_2d ( const T Ptr,
unsigned  SurfaceWidth,
unsigned  SurfaceHeight,
unsigned  SurfacePitch,
int  X,
int  Y 
)

2D USM pointer block load.

Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm

Collects elements located at specified address and returns them as a single simd object.

Template Parameters
Tis element type.
BlockWidthis the block width in number of elements.
BlockHeightis the block height in number of elements.
NBlocksis the number of blocks.
Transposedis the transposed version or not.
Transformedis apply VNNI transform or not.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the data size
Parameters
Ptris the surface base address for this operation.
SurfaceWidthis the surface width minus 1 in bytes
SurfaceHeightis the surface height minus 1 in rows
SurfacePitchis the surface pitch minus 1 in bytes
Xis zero based X-coordinate of the left upper rectangle corner in number of elements.
Yis zero based Y-coordinate of the left upper rectangle corner in rows.
Returns
is a vector of type T and size N, where N is BlockWidth * BlockHeight * NBlocks, if transformed; otherwise, N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) * getNextPowerOf2(BlockWidth) * NBlocks

Definition at line 2442 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), sycl::_V1::ext::intel::experimental::esimd::detail::load, sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::detail::transpose.

◆ lsc_prefetch() [1/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Accessor-based prefetch gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at surface.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
predis predicates.

Definition at line 1712 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_prefetch() [2/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( AccessorTy  acc,
uint32_t  offset 
)

Accessor-based transposed prefetch gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at surface.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetis the zero-based offset in bytes.

Definition at line 1778 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.

◆ lsc_prefetch() [3/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T p)

USM pointer prefetch transposed gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at specified address.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Parameters
pis the base pointer.

Definition at line 1665 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.

◆ lsc_prefetch() [4/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

USM pointer prefetch gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.ugm

Prefetches elements located at specified address.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
predis predicates.

Definition at line 1608 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_prefetch() [5/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T p,
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1633 of file memory.hpp.

◆ lsc_prefetch() [6/6]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> > sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch ( const T p,
Toffset  offset,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1644 of file memory.hpp.

◆ lsc_prefetch_2d() [1/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false, bool Transformed = false, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d ( config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &  payload)

A variation of 2D stateless block prefetch with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed.

Note: No software mitigation for hardware bugs is possible for this function.

Template Parameters
Tis the element data type
BlockWidththe block width in number of elements
BlockHeightblock height in number of elements
NBlocksNumber of blocks
Transposedis the transposed version or not.
Transformedis apply VNNI transform or not.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the data size
Parameters
payloadis config_2d_mem_access object holding all the data

Definition at line 2941 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), and sycl::_V1::ext::intel::experimental::esimd::detail::prefetch.

◆ lsc_prefetch_2d() [2/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_prefetch_2d ( const T Ptr,
unsigned  SurfaceWidth,
unsigned  SurfaceHeight,
unsigned  SurfacePitch,
int  X,
int  Y 
)

2D USM pointer block prefetch.

Supported platforms: PVC VISA instruction: lsc_load_block2d.ugm

Prefetches elements located at specified address.

Template Parameters
Tis element type.
BlockWidthis the block width in number of elements.
BlockHeightis the block height in number of elements.
NBlocksis the number of blocks.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the data size
Parameters
Ptris the surface base address for this operation.
SurfaceWidthis the surface width minus 1 in bytes
SurfaceHeightis the surface height minus 1 in rows
SurfacePitchis the surface pitch minus 1 in bytes
Xis zero based X-coordinate of the left upper rectangle corner in number of elements.
Yis zero based Y-coordinate of the left upper rectangle corner in rows.

Definition at line 2549 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::detail::prefetch.

◆ lsc_scatter() [1/4]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename AccessorTy >
__ESIMD_API std::enable_if_t< !std::is_pointer<AccessorTy>::value && !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy> > sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( AccessorTy  acc,
sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Accessor-based scatter.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to surface.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
AccessorTyis the sycl::accessor type.
Parameters
accis the SYCL accessor.
offsetsis the zero-based offsets in bytes.
valsis values to store.
predis predicates.

Definition at line 1971 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::esimd::get_surface_index(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_scatter() [2/4]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( T p,
sycl::ext::intel::esimd::simd< Toffset, N >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

USM pointer scatter.

Supported platforms: DG2, PVC VISA instruction: lsc_store.ugm

Scatters elements to specific address.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the number of channels (platform dependent).
Parameters
pis the base pointer.
offsetsis the zero-based offsets in bytes.
valsis values to store.
predis predicates.

Definition at line 1900 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_scatter() [3/4]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset , typename RegionTy = sycl::ext::intel::esimd::region1d_t<Toffset, N, 1>>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( T p,
sycl::ext::intel::esimd::simd_view< Toffset, RegionTy >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1928 of file memory.hpp.

◆ lsc_scatter() [4/4]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N, typename Toffset >
__ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1> sycl::_V1::ext::intel::experimental::esimd::lsc_scatter ( T p,
Toffset  offset,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

Definition at line 1940 of file memory.hpp.

◆ lsc_slm_atomic_update() [1/3]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd< T, N >  src1,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

SLM atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
Parameters
offsetsis the zero-based offsets.
src0is the first atomic operand (expected value).
src1is the second atomic operand (new value).
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3108 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, sycl::_V1::ext::intel::experimental::esimd::src0, and sycl::_V1::ext::intel::experimental::esimd::src1.

◆ lsc_slm_atomic_update() [2/3]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N >  src0,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

SLM atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
Parameters
offsetsis the zero-based offsets.
src0is the first atomic operand.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3061 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::src0.

◆ lsc_slm_atomic_update() [3/3]

template<sycl::ext::intel::esimd::atomic_op Op, typename T , int N, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_atomic_update ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred 
)

SLM atomic.

Supported platforms: DG2, PVC VISA instruction: lsc_atomic_<OP>.slm

Template Parameters
Opis operation type.
Tis element type.
Nis the number of channels (platform dependent).
DSis the data size.
Parameters
offsetsis the zero-based offsets.
predis predicates.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 3021 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_slm_block_load() [1/2]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load ( uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred,
sycl::ext::intel::esimd::simd< T, NElts >  old_values 
)

Transposed SLM gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
Parameters
offsetis the zero-based offset for SLM buffer in bytes.
predis the predicate; if it contains 0, then the actual load is not performed and old_values is returned.
old_valuescontains the vector that is returned if the parameter pred contains 0.
Returns
is a vector of type T and size NElts.

Definition at line 687 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.

◆ lsc_slm_block_load() [2/2]

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API sycl::ext::intel::esimd::simd<T, NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_load ( uint32_t  offset,
sycl::ext::intel::esimd::simd_mask< 1 >  pred = 1 
)

Transposed SLM gather with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
Parameters
offsetis the zero-based offset for SLM buffer in bytes.
predis the predicate; if it contains 0, then the actual load is not performed and the returned value is undefined.
Returns
is a vector of type T and size NElts

Definition at line 650 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.

◆ lsc_slm_block_store()

template<typename T , int NElts, lsc_data_size DS = lsc_data_size::default_size>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_block_store ( uint32_t  offset,
sycl::ext::intel::esimd::simd< T, NElts >  vals 
)

Transposed SLM scatter with 1 channel.

Supported platforms: DG2, PVC VISA instruction: lsc_store.slm

Scatters elements located to slm.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
Parameters
offsetis the zero-based offset for SLM buffer in bytes.
valsis values to store.

Definition at line 1859 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::none, sycl::_V1::ext::intel::experimental::esimd::detail::transpose, sycl::_V1::ext::intel::experimental::esimd::u32, and sycl::_V1::ext::intel::experimental::esimd::u64.

◆ lsc_slm_gather() [1/2]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred,
sycl::ext::intel::esimd::simd< T, N *NElts >  old_values 
)

SLM gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
Nis the number of channels (platform dependent).
Parameters
offsetsis the zero-based offsets for SLM buffer in bytes.
predis predicates.
old_valuesvalues copied to the result when the corresponding element of pred is zero..
Returns
is a vector of type T and size N * NElts

Definition at line 610 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_slm_gather() [2/2]

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API sycl::ext::intel::esimd::simd<T, N * NElts> sycl::_V1::ext::intel::experimental::esimd::lsc_slm_gather ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

SLM gather.

Supported platforms: DG2, PVC VISA instruction: lsc_load.slm

Collects elements located at slm and returns them as a single simd object.

Template Parameters
Tis element type.
NEltsis the number of elements to load per address.
DSis the data size.
Nis the number of channels (platform dependent).
Parameters
offsetsis the zero-based offsets for SLM buffer in bytes.
predis predicates.
Returns
is a vector of type T and size N * NElts

Definition at line 572 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_slm_scatter()

template<typename T , int NElts = 1, lsc_data_size DS = lsc_data_size::default_size, int N>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_slm_scatter ( sycl::ext::intel::esimd::simd< uint32_t, N >  offsets,
sycl::ext::intel::esimd::simd< T, N *NElts >  vals,
sycl::ext::intel::esimd::simd_mask< N >  pred = 1 
)

SLM scatter.

Supported platforms: DG2, PVC VISA instruction: lsc_store.slm

Scatters elements located to slm.

Template Parameters
Tis element type.
NEltsis the number of elements to store per address.
DSis the data size.
Nis the number of channels (platform dependent).
Parameters
offsetsis the zero-based offsets for SLM buffer in bytes.
valsis values to store.
predis predicates.

Definition at line 1826 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size(), sycl::_V1::ext::intel::experimental::esimd::none, and sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose.

◆ lsc_store_2d() [1/2]

template<typename T , int BlockWidth, int BlockHeight = 1, int NBlocks = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, NBlocks, BlockHeight, BlockWidth, false, false>()>
ESIMD_INLINE SYCL_ESIMD_FUNCTION void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d ( config_2d_mem_access< T, BlockWidth, BlockHeight, NBlocks > &  payload,
sycl::ext::intel::esimd::simd< T, N >  Data 
)

A variation of 2D stateless block store with parameters passed as config_2d_mem_access object Note: Compatibility with future hardware versions is not guaranteed.

Note: No software mitigation for hardware bugs is possible for this function.

Template Parameters
Tis the element data type
BlockWidththe block width in number of elements
BlockHeightblock height in number of elements
NBlocksNumber of blocks
L1His L1 cache hint.
L3His L3 cache hint.
Nis the data size
Parameters
payloadis config_2d_mem_access object holding all the data
Datais the data to be stored.

Definition at line 2983 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), and sycl::_V1::ext::intel::experimental::esimd::detail::store.

◆ lsc_store_2d() [2/2]

template<typename T , int BlockWidth, int BlockHeight = 1, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N = detail::get_lsc_block_2d_data_size< T, 1u, BlockHeight, BlockWidth, false, false>()>
__ESIMD_API void sycl::_V1::ext::intel::experimental::esimd::lsc_store_2d ( T Ptr,
unsigned  SurfaceWidth,
unsigned  SurfaceHeight,
unsigned  SurfacePitch,
int  X,
int  Y,
sycl::ext::intel::esimd::simd< T, N >  Vals 
)

2D USM pointer block store.

Supported platforms: PVC VISA instruction: lsc_store_block2d.ugm

Stores elements at specified address.

Template Parameters
Tis element type.
BlockWidthis the block width in number of elements.
BlockHeightis the block height in number of elements.
L1His L1 cache hint.
L3His L3 cache hint.
Nis the data size
Parameters
Ptris the surface base address for this operation.
SurfaceWidthis the surface width minus 1 in bytes
SurfaceHeightis the surface height minus 1 in rows
SurfacePitchis the surface pitch minus 1 in bytes
Xis zero based X-coordinate of the left upper rectangle corner in number of elements.
Yis zero based Y-coordinate of the left upper rectangle corner in rows.
Valsis a vector to store of type T and size N, where N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) * getNextPowerOf2(BlockWidth) * NBlocks

Definition at line 2595 of file memory.hpp.

References sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_block_2d_restrictions(), sycl::_V1::ext::intel::experimental::esimd::detail::nontranspose, and sycl::_V1::ext::intel::experimental::esimd::detail::store.