DPC++ Runtime
Runtime libraries for oneAPI DPC++
Shared local memory access functions.
Collaboration diagram for Shared local memory access functions.:

Functions

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ()
 Declare per-work-group slm size. More...
 
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init (uint32_t size)
 Declare per-work-group slm size. More...
 
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
 template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) More...
 
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, PropertyListT props={})
 template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, PropertyListT props={})
 template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, simd< T, N > pass_thru, PropertyListT props={})
 template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, PropertyListT props={})
 template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, PropertyListT props={})
 template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, PropertyListT props={})
 simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, PropertyListT props={})
 simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. More...
 
template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load (uint32_t offset)
 Load a scalar value from the Shared Local Memory. More...
 
template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) More...
 
template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
 template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. More...
 
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, PropertyListT props={})
 void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. More...
 
template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store (uint32_t offset, T val)
 Store a scalar value into the Shared Local Memory. More...
 
template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba (simd< uint32_t, N > offsets, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba (simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1)
 Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More...
 
template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, Flags)
 Loads a contiguous block of SLM memory referenced by the given byte-offset offset, then returns the loaded data as a simd object. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, PropertyListT props={})
 Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More...
 
template<typename T , int N, typename Flags >
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t offset, simd< T, N > vals, Flags)
 Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 Each of the following slm_block_store functions stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, PropertyListT props={})
 void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, PropertyListT props={})
 void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, PropertyListT props={})
 void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={}); More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc. More...
 
template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
  More...
 
template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates N memory locations in SLM ssociated with the local accessor lacc at the given byte_offset, and returns a vector of old values found at the memory locations before update. More...
 
template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
 One argument variant of the atomic update operation. More...
 
template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1) More...
 
template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, simd< T, N > > sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
 Two argument variant of the atomic update operation. More...
 
template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
 simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1) More...
 

Detailed Description

Function Documentation

◆ atomic_update() [1/3]

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1)

Definition at line 6127 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [2/3]

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1)

Atomically updates N memory locations in SLM indicated by local accessor lacc and a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 6057 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ atomic_update() [3/3]

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update ( AccessorT  lacc,
simd< uint32_t, N >  byte_offset,
simd_mask< N >  mask = 1 
)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates N memory locations in SLM ssociated with the local accessor lacc at the given byte_offset, and returns a vector of old values found at the memory locations before update.

Definition at line 5966 of file memory.hpp.

◆ block_load() [1/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props produces incorrect load results on Gen12.

Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.

Definition at line 5293 of file memory.hpp.

◆ block_load() [2/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The local accessor lacc must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5371 of file memory.hpp.

◆ block_load() [3/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The local accessor lacc must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R2: The target device must be DG2, PVC or newer GPU.

Definition at line 5450 of file memory.hpp.

◆ block_load() [4/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
uint32_t  byte_offset,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props produces incorrect load results on Gen12.

Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.

Definition at line 5258 of file memory.hpp.

◆ block_load() [5/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
uint32_t  byte_offset,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The lacc + byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5332 of file memory.hpp.

◆ block_load() [6/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  lacc,
uint32_t  byte_offset,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The lacc + byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5410 of file memory.hpp.

◆ block_store() [1/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 5673 of file memory.hpp.

◆ block_store() [2/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5749 of file memory.hpp.

◆ block_store() [3/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
uint32_t  byte_offset,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 5645 of file memory.hpp.

◆ block_store() [4/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  lacc,
uint32_t  byte_offset,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5711 of file memory.hpp.

◆ slm_atomic_update() [1/3]

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd< T, N >  src1,
simd_mask< N >  mask = 1 
)

Two argument variant of the atomic update operation.

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1) simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand (new value).
src1is the second atomic operand (expected value).
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 6097 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::fcmpxchg, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl(), sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [2/3]

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
simd< T, N >  src0,
simd_mask< N >  mask = 1 
)

One argument variant of the atomic update operation.

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1); /// (slm-au1-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1) /// (slm-au1-1)

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
src0is the first atomic operand.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 6008 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::fadd, sycl::_V1::ext::intel::esimd::fmax, sycl::_V1::ext::intel::esimd::fmin, sycl::_V1::ext::intel::esimd::fsub, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl(), sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::store.

◆ slm_atomic_update() [3/3]

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update ( simd< uint32_t, N >  byte_offset,
simd_mask< N >  mask = 1 
)

Atomic update operation performed on SLM. No-argument variant of the atomic update operation. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1) The following functions do the same work as slm_atomic_update(). They accept a local accessor lacc and the atomic update is done from SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<1> pred = 1); // (lacc-au0-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1)

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters
OpThe atomic operation - can be atomic_op::inc or atomic_op::dec, atomic_op::load.
TThe vector element type.
NThe number of memory locations to update.
Parameters
byte_offsetThe vector of 32-bit offsets.
maskOperation mask, only locations with non-zero in the corresponding mask element are updated.
Returns
A vector of the old values at the memory locations before the update.

Definition at line 5929 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::load, and sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl().

◆ slm_block_load() [1/4]

template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  byte_offset,
Flags   
)

Loads a contiguous block of SLM memory referenced by the given byte-offset offset, then returns the loaded data as a simd object.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient gather is generated. If the loaded vector is too long for 1 flat-load GPU instruction, then a series of flat-loads and/or gathers may be generated.

Template Parameters
TElement type.
NNumber of elements to load.
FlagsThe alignment specifier type tag.
Parameters
byte_offsetThe byte-offset to load from.
FlagsSpecifies the alignment.
Returns
A vector of loaded elements.

Definition at line 4990 of file memory.hpp.

◆ slm_block_load() [2/4]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  byte_offset,
PropertyListT  props = {} 
)

Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the byte_offset.

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. simd<T, N> slm_block_load(uint32_t byte_offset, props={}); // (slm-bl-1) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, props={}); // (slm-bl-2) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) The following functions do the same work as slm_block_load(). They accept a local accessor lacc and the load is done from SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) simd<T, N> slm_block_load(uint32_t byte_offset, props = {}); // (slm-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect load results on Gen12.

Definition at line 5048 of file memory.hpp.

◆ slm_block_load() [3/4]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  byte_offset,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5086 of file memory.hpp.

◆ slm_block_load() [4/4]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load ( uint32_t  offset,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5172 of file memory.hpp.

◆ slm_block_store() [1/3]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  byte_offset,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 5611 of file memory.hpp.

◆ slm_block_store() [2/3]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  byte_offset,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

Each of the following slm_block_store functions stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the byte_offset.

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props={});

The following functions do the same work as slm_block_store(). They accept a local accessor lacc and the store of vals is done to SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={});

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={});

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={});

void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block stored operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5538 of file memory.hpp.

◆ slm_block_store() [3/3]

template<typename T , int N, typename Flags >
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags> > sycl::_V1::ext::intel::esimd::slm_block_store ( uint32_t  offset,
simd< T, N >  vals,
Flags   
)

Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient scatter is generated. If the stored vector is too long for 1 flat-store GPU instruction, then a series of flat-store and/or scatters may be generated.

Template Parameters
TElement type.
NNumber of elements to store.
FlagsThe alignment specifier type tag.
Parameters
offsetThe byte-offset to store at.
valsThe vector to store.
FlagsSpecifies the alignment.

Definition at line 5473 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data().

◆ slm_gather() [1/9]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( OffsetSimdViewT  byte_offsets,
PropertyListT  props = {} 
)

simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 4734 of file memory.hpp.

◆ slm_gather() [2/9]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( OffsetSimdViewT  byte_offsets,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 4706 of file memory.hpp.

◆ slm_gather() [3/9]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( OffsetSimdViewT  byte_offsets,
simd_mask< N/VS >  mask,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 4673 of file memory.hpp.

◆ slm_gather() [4/9]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  byte_offsets,
PropertyListT  props = {} 
)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters
TElement type.
NNumber of elements to read.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 4635 of file memory.hpp.

◆ slm_gather() [5/9]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  byte_offsets,
simd_mask< N >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters
TElement type.
NNumber of elements to read.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 4610 of file memory.hpp.

◆ slm_gather() [6/9]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N >  byte_offsets,
simd_mask< N >  mask,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
TElement type.
NNumber of elements to read.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 4580 of file memory.hpp.

◆ slm_gather() [7/9]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N/VS >  byte_offsets,
PropertyListT  props = {} 
)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 4548 of file memory.hpp.

◆ slm_gather() [8/9]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N/VS >  byte_offsets,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 4489 of file memory.hpp.

◆ slm_gather() [9/9]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather ( simd< uint32_t, N/VS >  byte_offsets,
simd_mask< N/VS >  mask,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3)

The next 3 functions are similar to the above and were added for convenience. They assume the VS parameter is set to 1 and do not require specifying the template parameters <T, N, VS> at function calls. template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6)

The next 3 functions are variations of the first 3 above (slm-ga-1,2,3) and were added only to support simd_view instead of simd for byte_offsets and/or pass_thru operands. template <typename T, int N, int VS = 1, typename OffsetSimdViewT typename PropertyListT = empty_props_t> simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru PropertyListT props = {}); // (slm-ga-7) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) Supported platforms: DG2, PVC only - Temporary restriction for the variant with pass_thru operand. Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
maskThe access mask, defaults to all 1s.
pass_thruThe vector pass through values.
propsThe optional compile-time properties. Only 'alignment' property is used.
Returns
A vector of elements read.

Definition at line 4421 of file memory.hpp.

◆ slm_gather_rgba()

template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba ( simd< uint32_t, N >  offsets,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
RGBAMaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
maskOperation mask. All-1 by default.
Returns
Gathered data as an N - element vector.

Definition at line 4943 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), simd_mask< _Tp, _Abi >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_init() [1/2]

template<uint32_t SLMSize>
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( )

Declare per-work-group slm size.

GPU RT/driver requires this function to be called in the beginning of the kernel using SLM. There must be only 1 call site of slm_init() per kernel. If slm_init is called from some function F called from the kernel, then inlining of F into the kernel must be managed/guaranteed. slm_init<SLMSize> can also be used together with slm_allocator() class. In such cases slm_allocator<AdditionalMem> allocates extra chunk of SLM memory and the final amount of allocated SLM may be bigger than what is requested by slm_init. See more details on slm_allocator class usage at it's declaration and ESIMD extension SPEC.

Template Parameters
SLMSizeShared Local Memory (SLM) size

Definition at line 4336 of file memory.hpp.

◆ slm_init() [2/2]

__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( uint32_t  size)

Declare per-work-group slm size.

Non-constant argument version to be used with specialization constants only. Same restrictions are applied to this function as to it's template variant slm_init<SLMSize>(). This version has an additional restriction - it cannot be used together

Parameters
sizeShared Local Memory (SLM) size to be allocated for each work-group of ESIMD kernel.

Definition at line 4348 of file memory.hpp.

◆ slm_scalar_load()

template<typename T >
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load ( uint32_t  offset)

Load a scalar value from the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
Returns
the loaded value

Definition at line 4743 of file memory.hpp.

◆ slm_scalar_store()

template<typename T >
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store ( uint32_t  offset,
val 
)

Store a scalar value into the Shared Local Memory.

Template Parameters
Ttype of the value
Parameters
offsetSLM offset in bytes
valvalue to store

Definition at line 4926 of file memory.hpp.

◆ slm_scatter() [1/4]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( OffsetSimdViewT  byte_offsets,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 4915 of file memory.hpp.

◆ slm_scatter() [2/4]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( OffsetSimdViewT  byte_offsets,
simd< T, N >  vals,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Storage to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors..
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
valsThe vector of values to store.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 4889 of file memory.hpp.

◆ slm_scatter() [3/4]

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( simd< uint32_t, N/VS >  byte_offsets,
simd< T, N >  vals,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors..
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
valsThe vector of values to store.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 4854 of file memory.hpp.

◆ slm_scatter() [4/4]

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter ( simd< uint32_t, N/VS >  byte_offsets,
simd< T, N >  vals,
simd_mask< N/VS >  mask,
PropertyListT  props = {} 
)

template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2)

The next 2 functions are variations of the first 2 above (slm-sc-1,2) and were added only to support simd_view instead of simd for byte_offsets. template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. Storage of any element can be disabled via the input vector of predicates mask. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.

Template Parameters
TElement type.
NNumber of elements to read.
VSVector size. It can also be read as the number of writes per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.
Parameters
byte_offsetsthe vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
valsThe vector of values to store.
maskThe access mask, defaults to all 1s.
propsThe optional compile-time properties. Only 'alignment' property is used.

Definition at line 4793 of file memory.hpp.

◆ slm_scatter_rgba()

template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba ( simd< uint32_t, N >  offsets,
simd< T, N *get_num_channels_enabled(Mask)>  vals,
simd_mask< N >  mask = 1 
)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_scatter_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters
TThe element type of the returned vector.
NThe number of elements to access.
MaskPixel's channel mask.
Parameters
offsetsByte offsets within the SLM of each element.
valsvalues to be written.
maskOperation mask. All-1 by default.

Definition at line 4961 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().