DPC++ Runtime
Runtime libraries for oneAPI DPC++
Shared local memory access functions.
Collaboration diagram for Shared local memory access functions.:

Classes

class  sycl::_V1::ext::intel::esimd::slm_allocator< SLMAmount >
 RAII-style class used to implement "semi-dynamic" SLM allocation. More...
 

Functions

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ()
 Declare per-work-group slm size. More...
 
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init (uint32_t size)
 Declare per-work-group slm size. More...
 
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
 template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) More...
 
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, PropertyListT props={})
 template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, PropertyListT props={})
 template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, simd< T, N > pass_thru, PropertyListT props={})
 template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, PropertyListT props={})
 template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, PropertyListT props={})
 template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<int VS = 1, typename OffsetSimdViewT , typename PassThruSimdViewT , int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t<(detail::is_simd_view_type_v< OffsetSimdViewT > &&detail::is_simd_view_type_v< PassThruSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >), simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, PassThruSimdViewT pass_thru, PropertyListT props={})
 template <int VS = 1, typename OffsetSimdViewT, typename PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, PropertyListT props = {}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<int VS = 1, typename PassThruSimdViewT , int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t<(detail::is_simd_view_type_v< PassThruSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >), simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, PassThruSimdViewT pass_thru, PropertyListT props={})
 template <int VS = 1, typename PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, PropertyListT props = {}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, PropertyListT props={})
 simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, PropertyListT props={})
 simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load (uint32_t offset)
 Load a scalar value from the Shared Local Memory. More...
 
template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) More...
 
template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, PropertyListT props={})
 void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. More...
 
template<int VS = 1, typename ValuesSimdViewT , typename OffsetSimdViewT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&detail::is_simd_view_type_v< ValuesSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, ValuesSimdViewT vals, simd_mask< N/VS > mask, PropertyListT props={})
 template <int VS = 1, typename ValuesSimdViewT, typename OffsetSimdViewT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(OffsetSimdViewT byte_offsets, ValuesSimdViewT vals, simd_mask<N / VS> mask, PropertyListT props = {}); More...
 
template<int VS = 1, typename ValuesSimdViewT , typename OffsetSimdViewT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&detail::is_simd_view_type_v< ValuesSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, ValuesSimdViewT vals, PropertyListT props={})
 template <int VS = 1, typename ValuesSimdViewT, typename OffsetSimdViewT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(OffsetSimdViewT byte_offsets, ValuesSimdViewT vals, PropertyListT props = {}); More...
 
template<int VS = 1, typename ValuesSimdViewT , typename OffsetT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (simd< OffsetT, N/VS > byte_offsets, ValuesSimdViewT vals, simd_mask< N/VS > mask, PropertyListT props={})
 template <int VS = 1, typename ValuesSimdViewT, typename OffsetT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(simd<OffsetT, N / VS> byte_offsets, ValuesSimdViewT vals, simd_mask<N / VS> mask, PropertyListT props = {}); More...
 
template<int VS = 1, typename ValuesSimdViewT , typename OffsetT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (simd< OffsetT, N/VS > byte_offsets, ValuesSimdViewT vals, PropertyListT props={})
 template <int VS = 1, typename ValuesSimdViewT, typename OffsetT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(simd<OffsetT, N / VS> byte_offsets, ValuesSimdViewT vals, PropertyListT props = {}); More...
 
template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store (uint32_t offset, T val)
 Store a scalar value into the Shared Local Memory. More...
 
template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba (simd< uint32_t, N > offsets, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba (simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, Flags)
 Loads a contiguous block of SLM memory referenced by the given byte-offset offset, then returns the loaded data as a simd object. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, PropertyListT props={})
 Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. More...
 
template<typename PassThruSimdViewT , typename T = PassThruSimdViewT::value_type::element_type, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< PassThruSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset, simd_mask< 1 > pred, PassThruSimdViewT pass_thru, PropertyListT props={})
 simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, PassThruSimdViewT pass_thru, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset. More...
 
template<typename PassThruSimdViewT , typename T = PassThruSimdViewT::value_type::element_type, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< PassThruSimdViewT > &&detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, PassThruSimdViewT pass_thru, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, PassThruSimdViewT pass_thru, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More...
 
template<typename PassThruSimdViewT , typename T = PassThruSimdViewT::value_type::element_type, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< PassThruSimdViewT > &&detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, PassThruSimdViewT pass_thru, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, PassThruSimdViewT pass_thru, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename T , int N, typename Flags >
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t offset, simd< T, N > vals, Flags)
 Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 Each of the following slm_block_store functions stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, PropertyListT props={})
 void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset. More...
 
template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, ValuesSimdViewT vals, simd_mask< 1 > pred, PropertyListT props={})
 void slm_block_store(uint32_t byte_offset, ValuesSimdViewT vals, simd_mask<1> pred, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, ValuesSimdViewT vals, PropertyListT props={})
 void slm_block_store(uint32_t byte_offset, ValuesSimdViewT vals, props = {}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, PropertyListT props={})
 void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, PropertyListT props={})
 void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={}); More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc. More...
 
template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, ValuesSimdViewT vals, PropertyListT props={})
 void block_store(local_accessor lacc, uint32_t byte_offset, ValuesSimdViewT vals, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, ValuesSimdViewT vals, PropertyListT props={})
 void block_store(local_accessor lacc, ValuesSimdViewT vals, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, ValuesSimdViewT vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(local_accessor lacc, uint32_t byte_offset, ValuesSimdViewT vals, simd_mask<1> pred, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< ValuesSimdViewT > &&detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, ValuesSimdViewT vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(local_accessor lacc, ValuesSimdViewT vals, simd_mask<1> pred, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
  More...
 
template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates N memory locations in SLM ssociated with the local accessor lacc at the given byte_offset, and returns a vector of old values found at the memory locations before update. More...
 
template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
 One argument variant of the atomic update operation. More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&detail::is_simd_view_type_v< SrcSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, SrcSimdViewT src0, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd_mask<N> mask = 1) More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&detail::is_simd_view_type_v< OffsetSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (OffsetSimdViewT byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, simd<T, N> src0, simd_mask<N> mask = 1) More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY()>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&detail::is_simd_view_type_v< OffsetSimdViewT > &&detail::is_simd_view_type_v< SrcSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd_mask<N> mask = 1) More...
 
template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> mask = 1); // (lacc-au1-1) More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&detail::is_simd_view_type_v< OffsetSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, OffsetSimdViewT byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, simd<T, N> src0, simd_mask<1> mask = 1); More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&detail::is_simd_view_type_v< SrcSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, SrcSimdViewT src0, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd_mask<1> mask = 1); More...
 
template<atomic_op Op, typename SrcSimdViewT , typename OffsetSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&detail::is_simd_view_type_v< SrcSimdViewT > &&detail::is_simd_view_type_v< OffsetSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd_mask<1> mask = 1); More...
 
template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
 Two argument variant of the atomic update operation. More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, SrcSimdViewT src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< OffsetSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (OffsetSimdViewT byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&detail::is_simd_view_type_v< OffsetSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&detail::is_simd_view_type_v< OffsetSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (OffsetSimdViewT byte_offset, simd< T, N > src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY()>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&detail::is_simd_view_type_v< OffsetSimdViewT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (OffsetSimdViewT byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); // (lacc-au2-1) More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, SrcSimdViewT src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< OffsetSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, OffsetSimdViewT byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&detail::is_simd_view_type_v< OffsetSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&detail::is_simd_view_type_v< OffsetSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, OffsetSimdViewT byte_offset, simd< T, N > src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 
template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&detail::is_simd_view_type_v< SrcSimdViewT > &&detail::is_simd_view_type_v< OffsetSimdViewT > &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters. More...
 

Detailed Description

Function Documentation

◆ atomic_update() [1/13]

template<atomic_op Op, typename OffsetSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<OffsetSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
OffsetSimdViewT  byte_offset,
simd< T, N >  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Definition at line 8681 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [2/13]

template<atomic_op Op, typename OffsetSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && detail::is_simd_view_type_v<OffsetSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
OffsetSimdViewT  byte_offset,
simd< T, N >  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, simd<T, N> src0, simd_mask<1> mask = 1);

Variation of the API that allows using simd_view without specifying T and N template parameters. Atomically updates N memory locations in SLM indicated by local accessor lacc and a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8232 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ atomic_update() [3/13]

template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && detail::is_simd_view_type_v<OffsetSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
OffsetSimdViewT  byte_offset,
simd< T, N >  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Definition at line 8729 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [4/13]

template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && detail::is_simd_view_type_v<OffsetSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
OffsetSimdViewT  byte_offset,
SrcSimdViewT  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Definition at line 8705 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [5/13]

template<atomic_op Op, typename SrcSimdViewT , typename OffsetSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && detail::is_simd_view_type_v<SrcSimdViewT> && detail::is_simd_view_type_v<OffsetSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
OffsetSimdViewT  byte_offset,
SrcSimdViewT  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd_mask<1> mask = 1);

Variation of the API that allows using simd_view without specifying T and N template parameters. Atomically updates N memory locations in SLM indicated by local accessor lacc and a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8300 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ atomic_update() [6/13]

template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(), typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && detail::is_simd_view_type_v<OffsetSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
OffsetSimdViewT  byte_offset,
SrcSimdViewT  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, OffsetSimdViewT byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Definition at line 8757 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [7/13]

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); // (lacc-au2-1)

Definition at line 8592 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [8/13]

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> mask = 1); // (lacc-au1-1)

Atomically updates N memory locations in SLM indicated by local accessor lacc and a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8202 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ atomic_update() [9/13]

template<atomic_op Op, typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Definition at line 8634 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [10/13]

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates N memory locations in SLM ssociated with the local accessor lacc at the given byte_offset, and returns a vector of old values found at the memory locations before update.

Definition at line 8020 of file memory.hpp.

◆ atomic_update() [11/13]

template<atomic_op Op, typename SrcSimdViewT , typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
SrcSimdViewT  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Definition at line 8612 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [12/13]

template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && detail::is_simd_view_type_v<SrcSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
SrcSimdViewT  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd_mask<1> mask = 1);

Variation of the API that allows using simd_view without specifying T and N template parameters. Atomically updates N memory locations in SLM indicated by local accessor lacc and a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8265 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ atomic_update() [13/13]

template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
SrcSimdViewT  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Definition at line 8657 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ block_load() [1/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props produces incorrect load results on Gen12.

Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.

Definition at line 7033 of file memory.hpp.

◆ block_load() [2/8]

template<typename PassThruSimdViewT , typename T = PassThruSimdViewT::value_type::element_type, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<PassThruSimdViewT> && detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
simd_mask< 1 >  pred,
PassThruSimdViewT  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, PassThruSimdViewT pass_thru, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The local accessor lacc must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R2: The target device must be DG2, PVC or newer GPU.

Definition at line 7282 of file memory.hpp.

◆ block_load() [3/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The local accessor lacc must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7111 of file memory.hpp.

◆ block_load() [4/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The local accessor lacc must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R2: The target device must be DG2, PVC or newer GPU.

Definition at line 7236 of file memory.hpp.

◆ block_load() [5/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
uint32_t  byte_offset,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props produces incorrect load results on Gen12.

Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.

Definition at line 6998 of file memory.hpp.

◆ block_load() [6/8]

template<typename PassThruSimdViewT , typename T = PassThruSimdViewT::value_type::element_type, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<PassThruSimdViewT> && detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
uint32_t  byte_offset,
simd_mask< 1 >  pred,
PassThruSimdViewT  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, PassThruSimdViewT pass_thru, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The lacc + byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7197 of file memory.hpp.

◆ block_load() [7/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
uint32_t  byte_offset,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The lacc + byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7072 of file memory.hpp.

◆ block_load() [8/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
uint32_t  byte_offset,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The lacc + byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7150 of file memory.hpp.

◆ block_store() [1/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 7574 of file memory.hpp.

◆ block_store() [2/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7650 of file memory.hpp.

◆ block_store() [3/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
uint32_t  byte_offset,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 7546 of file memory.hpp.

◆ block_store() [4/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
uint32_t  byte_offset,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7612 of file memory.hpp.

◆ block_store() [5/8]

template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
uint32_t  byte_offset,
ValuesSimdViewT  vals,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, uint32_t byte_offset, ValuesSimdViewT vals, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 7684 of file memory.hpp.

◆ block_store() [6/8]

template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
uint32_t  byte_offset,
ValuesSimdViewT  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, uint32_t byte_offset, ValuesSimdViewT vals, simd_mask<1> pred, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7760 of file memory.hpp.

◆ block_store() [7/8]

template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
ValuesSimdViewT  vals,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, ValuesSimdViewT vals, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 7717 of file memory.hpp.

◆ block_store() [8/8]

template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename AccessorT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
ValuesSimdViewT  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, ValuesSimdViewT vals, simd_mask<1> pred, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7803 of file memory.hpp.

◆ slm_atomic_update() [1/13]

template<atomic_op Op, typename OffsetSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<OffsetSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( OffsetSimdViewT  byte_offset,
simd< T, N >  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8472 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [2/13]

template<atomic_op Op, typename OffsetSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && detail::is_simd_view_type_v<OffsetSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( OffsetSimdViewT  byte_offset,
simd< T, N >  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, simd<T, N> src0, simd_mask<N> mask = 1)

Variation of the API that allows using simd_view without specifying T and N template parameters. Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8140 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ slm_atomic_update() [3/13]

template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && detail::is_simd_view_type_v<OffsetSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( OffsetSimdViewT  byte_offset,
simd< T, N >  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8538 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [4/13]

template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && detail::is_simd_view_type_v<OffsetSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( OffsetSimdViewT  byte_offset,
SrcSimdViewT  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8504 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [5/13]

template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY()>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && detail::is_simd_view_type_v<OffsetSimdViewT> && detail::is_simd_view_type_v<SrcSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( OffsetSimdViewT  byte_offset,
SrcSimdViewT  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, SrcSimdViewT src0, simd_mask<N> mask = 1)

Variation of the API that allows using simd_view without specifying T and N template parameters. Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8172 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ slm_atomic_update() [6/13]

template<atomic_op Op, typename OffsetSimdViewT , typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY()>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT> && detail::is_simd_view_type_v<OffsetSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( OffsetSimdViewT  byte_offset,
SrcSimdViewT  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(OffsetSimdViewT byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8573 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [7/13]

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

Two argument variant of the atomic update operation.

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> mask = 1); // (lacc-au2-1) simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8341 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::fcmpxchg, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl(), sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [8/13]

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd_mask< N >  mask = 1 
)

One argument variant of the atomic update operation.

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1); /// (slm-au1-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1); // (lacc-au1-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1) /// (slm-au1-1)

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8062 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::fadd, sycl::_V1::ext::intel::esimd::fmax, sycl::_V1::ext::intel::esimd::fmin, sycl::_V1::ext::intel::esimd::fsub, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl(), sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::store.

◆ slm_atomic_update() [9/13]

template<atomic_op Op, typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8411 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [10/13]

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
simd_mask< N >  mask = 1 
)

Atomic update operation performed on SLM. No-argument variant of the atomic update operation. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1) The following functions do the same work as slm_atomic_update(). They accept a local accessor lacc and the atomic update is done from SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<1> pred = 1); // (lacc-au0-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1)

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation - can be atomic_op::inc or atomic_op::dec, atomic_op::load.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 7983 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::load, and sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl().

◆ slm_atomic_update() [11/13]

template<atomic_op Op, typename SrcSimdViewT , typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
SrcSimdViewT  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd<T, N> src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8382 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [12/13]

template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && detail::is_simd_view_type_v<SrcSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
SrcSimdViewT  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, SrcSimdViewT src0, simd_mask<N> mask = 1)

Variation of the API that allows using simd_view without specifying T and N template parameters. Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8111 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ slm_atomic_update() [13/13]

template<atomic_op Op, typename SrcSimdViewT , typename T = SrcSimdViewT::value_type::element_type, int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && detail::is_simd_view_type_v<SrcSimdViewT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
SrcSimdViewT  src0,
SrcSimdViewT  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, SrcSimdViewT src0, SrcSimdViewT src1, simd_mask<N> mask = 1); Variation of the API that allows using simd_view without specifying T and N template parameters.

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 8441 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_block_load() [1/5]

template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  byte_offset,
Flags   
)

Loads a contiguous block of SLM memory referenced by the given byte-offset offset, then returns the loaded data as a simd object.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient gather is generated. If the loaded vector is too long for 1 flat-load GPU instruction, then a series of flat-loads and/or gathers may be generated.

Template Parameters
TElement type.
NNumber of elements to load.
FlagsThe alignment specifier type tag.
Parameters
byte_offsetThe byte-offset to load from.
FlagsSpecifies the alignment.
Returns
A vector of loaded elements.

Definition at line 6685 of file memory.hpp.

◆ slm_block_load() [2/5]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  byte_offset,
PropertyListT  props = {} 
)

Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the byte_offset.

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. simd<T, N> slm_block_load(uint32_t byte_offset, props={}); // (slm-bl-1) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, props={}); // (slm-bl-2) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) The following functions do the same work as slm_block_load(). They accept a local accessor lacc and the load is done from SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) simd<T, N> slm_block_load(uint32_t byte_offset, props = {}); // (slm-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect load results on Gen12.

Definition at line 6743 of file memory.hpp.

◆ slm_block_load() [3/5]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  byte_offset,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 6781 of file memory.hpp.

◆ slm_block_load() [4/5]

template<typename PassThruSimdViewT , typename T = PassThruSimdViewT::value_type::element_type, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<PassThruSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  offset,
simd_mask< 1 >  pred,
PassThruSimdViewT  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, PassThruSimdViewT pass_thru, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 6962 of file memory.hpp.

◆ slm_block_load() [5/5]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  offset,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 6867 of file memory.hpp.

◆ slm_block_store() [1/5]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  byte_offset,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 7442 of file memory.hpp.

◆ slm_block_store() [2/5]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  byte_offset,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

Each of the following slm_block_store functions stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the byte_offset.

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props={});

The following functions do the same work as slm_block_store(). They accept a local accessor lacc and the store of vals is done to SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={});

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={});

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={});

void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block stored operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7369 of file memory.hpp.

◆ slm_block_store() [3/5]

template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  byte_offset,
ValuesSimdViewT  vals,
PropertyListT  props = {} 
)

void slm_block_store(uint32_t byte_offset, ValuesSimdViewT vals, props = {}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 7518 of file memory.hpp.

◆ slm_block_store() [4/5]

template<typename ValuesSimdViewT , typename T = ValuesSimdViewT::value_type::element_type, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  byte_offset,
ValuesSimdViewT  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void slm_block_store(uint32_t byte_offset, ValuesSimdViewT vals, simd_mask<1> pred, props={}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block stored operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 7488 of file memory.hpp.

◆ slm_block_store() [5/5]

template<typename T , int N, typename Flags >
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  offset,
simd< T, N >  vals,
Flags   
)

Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient scatter is generated. If the stored vector is too long for 1 flat-store GPU instruction, then a series of flat-store and/or scatters may be generated.

Template Parameters
TElement type.
NNumber of elements to store.
FlagsThe alignment specifier type tag.
Parameters
offsetThe byte-offset to store at.
valsThe vector to store.
FlagsSpecifies the alignment.

Definition at line 7304 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data().

◆ slm_gather() [1/11]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( OffsetSimdViewT  byte_offsets,
PropertyListT  props = {} 
)

simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 6225 of file memory.hpp.

◆ slm_gather() [2/11]

template<int VS = 1, typename OffsetSimdViewT , typename PassThruSimdViewT , int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< (detail::is_simd_view_type_v<OffsetSimdViewT> && detail::is_simd_view_type_v<PassThruSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>), simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( OffsetSimdViewT  byte_offsets,
simd_mask< N/VS >  mask,
PassThruSimdViewT  pass_thru,
PropertyListT  props = {} 
)

template <int VS = 1, typename OffsetSimdViewT, typename PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, PropertyListT props = {}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 6116 of file memory.hpp.

◆ slm_gather() [3/11]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( OffsetSimdViewT  byte_offsets,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 6197 of file memory.hpp.

◆ slm_gather() [4/11]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( OffsetSimdViewT  byte_offsets,
simd_mask< N/VS >  mask,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 6030 of file memory.hpp.

◆ slm_gather() [5/11]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  byte_offsets,
PropertyListT  props = {} 
)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters
TElement type.
NNumber of elements to read.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 5992 of file memory.hpp.

◆ slm_gather() [6/11]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  byte_offsets,
simd_mask< N >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters
TElement type.
NNumber of elements to read.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 5967 of file memory.hpp.

◆ slm_gather() [7/11]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  byte_offsets,
simd_mask< N >  mask,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
TElement type.
NNumber of elements to read.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 5937 of file memory.hpp.

◆ slm_gather() [8/11]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N/VS >  byte_offsets,
PropertyListT  props = {} 
)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 5905 of file memory.hpp.

◆ slm_gather() [9/11]

template<int VS = 1, typename PassThruSimdViewT , int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< (detail::is_simd_view_type_v<PassThruSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>), simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N/VS >  byte_offsets,
simd_mask< N/VS >  mask,
PassThruSimdViewT  pass_thru,
PropertyListT  props = {} 
)

template <int VS = 1, typename PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), typename T = PassThruSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, PropertyListT props = {}); Variation of the API that allows using simd_view without specifying T and N template parameters.

Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 6164 of file memory.hpp.

◆ slm_gather() [10/11]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N/VS >  byte_offsets,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 5846 of file memory.hpp.

◆ slm_gather() [11/11]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N/VS >  byte_offsets,
simd_mask< N/VS >  mask,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3)

The next 3 functions are similar to the above and were added for convenience. They assume the VS parameter is set to 1 and do not require specifying the template parameters <T, N, VS> at function calls. template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6)

The next 3 functions are variations of the first 3 above (slm-ga-1,2,3) and were added only to support simd_view instead of simd for byte_offsets and/or pass_thru operands. template <typename T, int N, int VS = 1, typename OffsetSimdViewT typename PropertyListT = empty_props_t> simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru PropertyListT props = {}); // (slm-ga-7) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) Supported platforms: DG2, PVC only - Temporary restriction for the variant with pass_thru operand. Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 5778 of file memory.hpp.

◆ slm_gather_rgba()

template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
RGBAMaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
maskOperation mask. All-1 by default.
Returns
Gathered data as an N - element vector.

Definition at line 6638 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), simd_mask< _Tp, _Abi >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_init() [1/2]

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( )

Declare per-work-group slm size.

GPU RT/driver requires this function to be called in the beginning of the kernel using SLM. There must be only 1 call site of slm_init() per kernel. If slm_init is called from some function F called from the kernel, then inlining of F into the kernel must be managed/guaranteed. slm_init<SLMSize> can also be used together with slm_allocator() class. In such cases slm_allocator<AdditionalMem> allocates extra chunk of SLM memory and the final amount of allocated SLM may be bigger than what is requested by slm_init. See more details on slm_allocator class usage at it's declaration and ESIMD extension SPEC.

Template Parameters
SLMSizeShared Local Memory (SLM) size

Definition at line 5693 of file memory.hpp.

◆ slm_init() [2/2]

__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( uint32_t  size)

Declare per-work-group slm size.

Non-constant argument version to be used with specialization constants only. Same restrictions are applied to this function as to it's template variant slm_init<SLMSize>(). This version has an additional restriction - it cannot be used together

Parameters
sizeShared Local Memory (SLM) size to be allocated for each work-group of ESIMD kernel.

Definition at line 5705 of file memory.hpp.

◆ slm_scalar_load()

template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load ( uint32_t  offset)

Load a scalar value from the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
Returns
the loaded value

Definition at line 6234 of file memory.hpp.

◆ slm_scalar_store()

template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store ( uint32_t  offset,
val 
)

Store a scalar value into the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
valvalue to store

Definition at line 6621 of file memory.hpp.

◆ slm_scatter() [1/8]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter ( OffsetSimdViewT  byte_offsets,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

template <int VS, typename T, int N, typename OffsetSimdViewT, typename PropertyListT = empty_properties_t> void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {});

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Variation of the API that allows using simd_view without specifying T and N template parameters. Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional comspile-time properties. Only 'alignment' property is used.

Definition at line 6406 of file memory.hpp.

◆ slm_scatter() [2/8]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter ( OffsetSimdViewT  byte_offsets,
simd< T, N >  vals,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

template <int VS, typename T, int N, typename OffsetSimdViewT, typename PropertyListT = empty_properties_t> void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {});

Storage to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors..
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
valsThe vector of values to store.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.

Variation of the API that allows using simd_view without specifying T and N template parameters. Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 6380 of file memory.hpp.

◆ slm_scatter() [3/8]

template<int VS = 1, typename ValuesSimdViewT , typename OffsetSimdViewT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && detail::is_simd_view_type_v<ValuesSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( OffsetSimdViewT  byte_offsets,
ValuesSimdViewT  vals,
PropertyListT  props = {} 
)

template <int VS = 1, typename ValuesSimdViewT, typename OffsetSimdViewT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(OffsetSimdViewT byte_offsets, ValuesSimdViewT vals, PropertyListT props = {});

Variation of the API that allows using simd_view without specifying T and N template parameters. Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 6541 of file memory.hpp.

◆ slm_scatter() [4/8]

template<int VS = 1, typename ValuesSimdViewT , typename OffsetSimdViewT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && detail::is_simd_view_type_v<ValuesSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( OffsetSimdViewT  byte_offsets,
ValuesSimdViewT  vals,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <int VS = 1, typename ValuesSimdViewT, typename OffsetSimdViewT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(OffsetSimdViewT byte_offsets, ValuesSimdViewT vals, simd_mask<N / VS> mask, PropertyListT props = {});

Variation of the API that allows using simd_view without specifying T and N template parameters. Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 6504 of file memory.hpp.

◆ slm_scatter() [5/8]

template<int VS = 1, typename ValuesSimdViewT , typename OffsetT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( simd< OffsetT, N/VS >  byte_offsets,
ValuesSimdViewT  vals,
PropertyListT  props = {} 
)

template <int VS = 1, typename ValuesSimdViewT, typename OffsetT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(simd<OffsetT, N / VS> byte_offsets, ValuesSimdViewT vals, PropertyListT props = {});

Variation of the API that allows using simd_view without specifying T and N template parameters. Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 6610 of file memory.hpp.

◆ slm_scatter() [6/8]

template<int VS = 1, typename ValuesSimdViewT , typename OffsetT , int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<ValuesSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( simd< OffsetT, N/VS >  byte_offsets,
ValuesSimdViewT  vals,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <int VS = 1, typename ValuesSimdViewT, typename OffsetT, int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(), typename T = ValuesSimdViewT::value_type::element_type, typename PropertyListT = empty_properties_t> void slm_scatter(simd<OffsetT, N / VS> byte_offsets, ValuesSimdViewT vals, simd_mask<N / VS> mask, PropertyListT props = {});

Variation of the API that allows using simd_view without specifying T and N template parameters. Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 6578 of file memory.hpp.

◆ slm_scatter() [7/8]

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( simd< uint32_t, N/VS >  byte_offsets,
simd< T, N >  vals,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors..
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 6345 of file memory.hpp.

◆ slm_scatter() [8/8]

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( simd< uint32_t, N/VS >  byte_offsets,
simd< T, N >  vals,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2)

The next 2 functions are variations of the first 2 above (slm-sc-1,2) and were added only to support simd_view instead of simd for byte_offsets. template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. Storage of any element can be disabled via the input vector of predicates mask. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of writes per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
valsThe vector of values to store.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 6284 of file memory.hpp.

◆ slm_scatter_rgba()

template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba ( simd< uint32_t, N >  offsets,
simd< T, N *get_num_channels_enabled(Mask)>  vals,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_scatter_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
MaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
valsvalues to be written.
maskOperation mask. All-1 by default.

Definition at line 6656 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().