Collaboration diagram for Shared local memory access functions.:

Functions
template<uint32_t SLMSize>
__ESIMD_API void	sycl::_V1::ext::intel::esimd::slm_init ()
	Declare per-work-group slm size. More...

__ESIMD_API void	sycl::_V1::ext::intel::esimd::slm_init (uint32_t size)
	Declare per-work-group slm size. More...

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
	template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) More...

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, PropertyListT props={})
	template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, PropertyListT props={})
	template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, simd< T, N > pass_thru, PropertyListT props={})
	template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, PropertyListT props={})
	template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, PropertyListT props={})
	template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
	template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, PropertyListT props={})
	simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, PropertyListT props={})
	simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets `byte_offsets`, and returns the loaded elements. More...

template<typename T >
__ESIMD_API T	sycl::_V1::ext::intel::esimd::slm_scalar_load (uint32_t offset)
	Load a scalar value from the Shared Local Memory. More...

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
	template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) More...

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, PropertyListT props={})
	template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets `byte_offsets`. More...

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
	template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets `byte_offsets`. More...

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, PropertyListT props={})
	void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets `byte_offsets`. More...

template<typename T >
__ESIMD_API void	sycl::_V1::ext::intel::esimd::slm_scalar_store (uint32_t offset, T val)
	Store a scalar value into the Shared Local Memory. More...

template<typename T , int N, rgba_channel_mask RGBAMask>
__ESIMD_API std::enable_if_t<(N==8\|\|N==16\|\|N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> >	sycl::_V1::ext::intel::esimd::slm_gather_rgba (simd< uint32_t, N > offsets, simd_mask< N > mask=1)
	Gather data from the Shared Local Memory at specified `offsets` and return it as simd vector. More...

template<typename T , int N, rgba_channel_mask Mask>
__ESIMD_API std::enable_if_t<(N==8\|\|N==16\|\|N==32) &&(sizeof(T)==4)>	sycl::_V1::ext::intel::esimd::slm_scatter_rgba (simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1)
	Gather data from the Shared Local Memory at specified `offsets` and return it as simd vector. More...

template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, Flags)
	Loads a contiguous block of SLM memory referenced by the given byte-offset `offset`, then returns the loaded data as a simd object. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, PropertyListT props={})
	Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the `byte_offset`. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={})
	simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given `byte_offset`. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
	simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given `byte_offset`. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, PropertyListT props={})
	simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor `lacc` at the given `byte_offset`. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, PropertyListT props={})
	simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor `lacc` at zero offset. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={})
	simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor `lacc` at the given `byte_offset`. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, PropertyListT props={})
	simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor `lacc` at zero offset. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
	simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor `lacc` at the given `byte_offset`. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
	simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor `lacc` at zero offset. More...

template<typename T , int N, typename Flags >
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags > >	sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t offset, simd< T, N > vals, Flags)
	Stores elements of the vector `vals` to a contiguous block of SLM memory at the given byte-offset `offset`. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
	Each of the following slm_block_store functions stores the vector `vals` to a contiguous memory block in SLM (Shared Local Memory) at the `byte_offset`. More...

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, PropertyListT props={})
	void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector `vals` to a contiguous memory block in SLM (Shared Local Memory) at the given `byte_offset`. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, PropertyListT props={})
	void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector `vals` to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor `lacc` at the given `byte_offset`. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, PropertyListT props={})
	void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector `vals` to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor `lacc`. More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
	void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={}); More...

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > >	sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
	void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector `vals` to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor `lacc`. More...

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
	More...

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
	simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates `N` memory locations in SLM ssociated with the local accessor `lacc` at the given `byte_offset`, and returns a vector of old values found at the memory locations before update. More...

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
	One argument variant of the atomic update operation. More...

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1)
	simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1) More...

template<atomic_op Op, typename T , int N>
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, simd< T, N > >	sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
	Two argument variant of the atomic update operation. More...

template<atomic_op Op, typename T , int N, typename AccessorT >
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > >	sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1)
	simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1) More...

Detailed Description

Function Documentation

◆ atomic_update() [1/3]

template<atomic_op Op, typename T , int N, typename AccessorT >

__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update	(	AccessorT	lacc,
		simd< uint32_t, N >	byte_offset,
		simd< T, N >	src0,
		simd< T, N >	src1,
		simd_mask< N >	mask = `1`
	)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1)

Definition at line 6127 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ atomic_update() [2/3]

template<atomic_op Op, typename T , int N, typename AccessorT >

__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update	(	AccessorT	lacc,
		simd< uint32_t, N >	byte_offset,
		simd< T, N >	src0,
		simd_mask< N >	mask = `1`
	)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1)

Atomically updates N memory locations in SLM indicated by local accessor lacc and a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters

Op	The atomic operation.
T	The vector element type.
N	The number of memory locations to update.

Parameters

byte_offset	The vector of 32-bit offsets.
src0	is the first atomic operand.
mask	Operation mask, only locations with non-zero in the corresponding mask element are updated.

Returns: A vector of the old values at the memory locations before the update.

Definition at line 6057 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::src0.

◆ atomic_update() [3/3]

template<atomic_op Op, typename T , int N, typename AccessorT >

__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update	(	AccessorT	lacc,
		simd< uint32_t, N >	byte_offset,
		simd_mask< N >	mask = `1`
	)

simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates N memory locations in SLM ssociated with the local accessor lacc at the given byte_offset, and returns a vector of old values found at the memory locations before update.

Definition at line 5966 of file memory.hpp.

◆ block_load() [1/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load	(	AccessorT	lacc,
		PropertyListT	props = `{}`
	)

simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props produces incorrect load results on Gen12.

Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.

Definition at line 5293 of file memory.hpp.

◆ block_load() [2/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load	(	AccessorT	lacc,
		simd_mask< 1 >	pred,
		PropertyListT	props = `{}`
	)

simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The local accessor lacc must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5371 of file memory.hpp.

◆ block_load() [3/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load	(	AccessorT	lacc,
		simd_mask< 1 >	pred,
		simd< T, N >	pass_thru,
		PropertyListT	props = `{}`
	)

simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The local accessor lacc must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R2: The target device must be DG2, PVC or newer GPU.

Definition at line 5450 of file memory.hpp.

◆ block_load() [4/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load	(	AccessorT	lacc,
		uint32_t	byte_offset,
		PropertyListT	props = `{}`
	)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props produces incorrect load results on Gen12.

Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.

Definition at line 5258 of file memory.hpp.

◆ block_load() [5/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load	(	AccessorT	lacc,
		uint32_t	byte_offset,
		simd_mask< 1 >	pred,
		PropertyListT	props = `{}`
	)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The lacc + byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5332 of file memory.hpp.

◆ block_load() [6/6]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load	(	AccessorT	lacc,
		uint32_t	byte_offset,
		simd_mask< 1 >	pred,
		simd< T, N >	pass_thru,
		PropertyListT	props = `{}`
	)

simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru value is returned.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The lacc + byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5410 of file memory.hpp.

◆ block_store() [1/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store	(	AccessorT	lacc,
		simd< T, N >	vals,
		PropertyListT	props = `{}`
	)

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 5673 of file memory.hpp.

◆ block_store() [2/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store	(	AccessorT	lacc,
		simd< T, N >	vals,
		simd_mask< 1 >	pred,
		PropertyListT	props = `{}`
	)

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5749 of file memory.hpp.

◆ block_store() [3/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store	(	AccessorT	lacc,
		uint32_t	byte_offset,
		simd< T, N >	vals,
		PropertyListT	props = `{}`
	)

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 5645 of file memory.hpp.

◆ block_store() [4/4]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store	(	AccessorT	lacc,
		uint32_t	byte_offset,
		simd< T, N >	vals,
		simd_mask< 1 >	pred,
		PropertyListT	props = `{}`
	)

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});

Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5711 of file memory.hpp.

◆ slm_atomic_update() [1/3]

template<atomic_op Op, typename T , int N>

__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update	(	simd< uint32_t, N >	byte_offset,
		simd< T, N >	src0,
		simd< T, N >	src1,
		simd_mask< N >	mask = `1`
	)

Two argument variant of the atomic update operation.

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1) simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters

Op	The atomic operation.
T	The vector element type.
N	The number of memory locations to update.

Parameters

byte_offset	The vector of 32-bit offsets.
src0	is the first atomic operand (new value).
src1	is the second atomic operand (expected value).
mask	Operation mask, only locations with non-zero in the corresponding mask element are updated.

Returns: A vector of the old values at the memory locations before the update.

Definition at line 6097 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::fcmpxchg, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl(), sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.

◆ slm_atomic_update() [2/3]

template<atomic_op Op, typename T , int N>

__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update	(	simd< uint32_t, N >	byte_offset,
		simd< T, N >	src0,
		simd_mask< N >	mask = `1`
	)

One argument variant of the atomic update operation.

simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1); /// (slm-au1-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1) /// (slm-au1-1)

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters

Op	The atomic operation.
T	The vector element type.
N	The number of memory locations to update.

Parameters

byte_offset	The vector of 32-bit offsets.
src0	is the first atomic operand.
mask	Operation mask, only locations with non-zero in the corresponding mask element are updated.

Returns: A vector of the old values at the memory locations before the update.

Definition at line 6008 of file memory.hpp.

◆ slm_atomic_update() [3/3]

template<atomic_op Op, typename T , int N>

__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update	(	simd< uint32_t, N >	byte_offset,
		simd_mask< N >	mask = `1`
	)

Atomic update operation performed on SLM. No-argument variant of the atomic update operation. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1) The following functions do the same work as slm_atomic_update(). They accept a local accessor lacc and the atomic update is done from SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<1> pred = 1); // (lacc-au0-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1)

Atomically updates N memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.

Template Parameters

Op	The atomic operation - can be `atomic_op::inc` or `atomic_op::dec`, `atomic_op::load`.
T	The vector element type.
N	The number of memory locations to update.

Parameters

byte_offset	The vector of 32-bit offsets.
mask	Operation mask, only locations with non-zero in the corresponding mask element are updated.

Returns: A vector of the old values at the memory locations before the update.

Definition at line 5929 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::load, and sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl().

◆ slm_block_load() [1/4]

template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>

__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load	(	uint32_t	byte_offset,
		Flags
	)

Loads a contiguous block of SLM memory referenced by the given byte-offset offset, then returns the loaded data as a simd object.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient gather is generated. If the loaded vector is too long for 1 flat-load GPU instruction, then a series of flat-loads and/or gathers may be generated.

Template Parameters

T	Element type.
N	Number of elements to load.
Flags	The alignment specifier type tag.

Parameters

byte_offset	The byte-offset to load from.
Flags	Specifies the alignment.

Returns: A vector of loaded elements.

Definition at line 4990 of file memory.hpp.

◆ slm_block_load() [2/4]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load	(	uint32_t	byte_offset,
		PropertyListT	props = `{}`
	)

Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the byte_offset.

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. simd<T, N> slm_block_load(uint32_t byte_offset, props={}); // (slm-bl-1) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, props={}); // (slm-bl-2) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) The following functions do the same work as slm_block_load(). They accept a local accessor lacc and the load is done from SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) simd<T, N> slm_block_load(uint32_t byte_offset, props = {}); // (slm-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect load results on Gen12.

Definition at line 5048 of file memory.hpp.

◆ slm_block_load() [3/4]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load	(	uint32_t	byte_offset,
		simd_mask< 1 >	pred,
		PropertyListT	props = `{}`
	)

simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5086 of file memory.hpp.

◆ slm_block_load() [4/4]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load	(	uint32_t	offset,
		simd_mask< 1 >	pred,
		simd< T, N >	pass_thru,
		PropertyListT	props = `{}`
	)

simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset.

The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5172 of file memory.hpp.

◆ slm_block_store() [1/3]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store	(	uint32_t	byte_offset,
		simd< T, N >	vals,
		PropertyListT	props = `{}`
	)

void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset not aligned by 16-bytes and not specifying the actual alignment in props produces incorrect store results on Gen12.

Definition at line 5611 of file memory.hpp.

◆ slm_block_store() [2/3]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store	(	uint32_t	byte_offset,
		simd< T, N >	vals,
		simd_mask< 1 >	pred,
		PropertyListT	props = `{}`
	)

Each of the following slm_block_store functions stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the byte_offset.

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props={});

The following functions do the same work as slm_block_store(). They accept a local accessor lacc and the store of vals is done to SLM associated with lacc plus byte_offset applied to it. If byte_offset is omitted, then zero offset is used. void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={});

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={});

void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});

void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={});

void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset. The parameter pred is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block stored operation is a NO-OP.

The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.

Alignment: If props does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).

Restrictions - predicate imposed - temporary: R1: The byte_offset must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 5538 of file memory.hpp.

◆ slm_block_store() [3/3]

template<typename T , int N, typename Flags >

__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags> > sycl::_V1::ext::intel::esimd::slm_block_store	(	uint32_t	offset,
		simd< T, N >	vals,
		Flags
	)

Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient scatter is generated. If the stored vector is too long for 1 flat-store GPU instruction, then a series of flat-store and/or scatters may be generated.

Template Parameters

T	Element type.
N	Number of elements to store.
Flags	The alignment specifier type tag.

Parameters

offset	The byte-offset to store at.
vals	The vector to store.
Flags	Specifies the alignment.

Definition at line 5473 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data().

◆ slm_gather() [1/9]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	OffsetSimdViewT	byte_offsets,
		PropertyListT	props = `{}`
	)

simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read.

Definition at line 4734 of file memory.hpp.

◆ slm_gather() [2/9]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	OffsetSimdViewT	byte_offsets,
		simd_mask< N/VS >	mask,
		PropertyListT	props = `{}`
	)

simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
mask	The access mask, defaults to all 1s.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 4706 of file memory.hpp.

◆ slm_gather() [3/9]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	OffsetSimdViewT	byte_offsets,
		simd_mask< N/VS >	mask,
		simd< T, N >	pass_thru,
		PropertyListT	props = `{}`
	)

template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
mask	The access mask, defaults to all 1s.
pass_thru	The vector pass through values.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read.

Definition at line 4673 of file memory.hpp.

◆ slm_gather() [4/9]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	simd< uint32_t, N >	byte_offsets,
		PropertyListT	props = `{}`
	)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters

T	Element type.
N	Number of elements to read.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read.

Definition at line 4635 of file memory.hpp.

◆ slm_gather() [5/9]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	simd< uint32_t, N >	byte_offsets,
		simd_mask< N >	mask,
		PropertyListT	props = `{}`
	)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters

T	Element type.
N	Number of elements to read.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
mask	The access mask, defaults to all 1s.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 4610 of file memory.hpp.

◆ slm_gather() [6/9]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	simd< uint32_t, N >	byte_offsets,
		simd_mask< N >	mask,
		simd< T, N >	pass_thru,
		PropertyListT	props = `{}`
	)

template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters

T	Element type.
N	Number of elements to read.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
mask	The access mask, defaults to all 1s.
pass_thru	The vector pass through values.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read.

Definition at line 4580 of file memory.hpp.

◆ slm_gather() [7/9]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	simd< uint32_t, N/VS >	byte_offsets,
		PropertyListT	props = `{}`
	)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read.

Definition at line 4548 of file memory.hpp.

◆ slm_gather() [8/9]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	simd< uint32_t, N/VS >	byte_offsets,
		simd_mask< N/VS >	mask,
		PropertyListT	props = `{}`
	)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements.

Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
mask	The access mask, defaults to all 1s.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read. Elements in masked out lanes are undefined.

Definition at line 4489 of file memory.hpp.

◆ slm_gather() [9/9]

template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather	(	simd< uint32_t, N/VS >	byte_offsets,
		simd_mask< N/VS >	mask,
		simd< T, N >	pass_thru,
		PropertyListT	props = `{}`
	)

template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3)

The next 3 functions are similar to the above and were added for convenience. They assume the VS parameter is set to 1 and do not require specifying the template parameters <T, N, VS> at function calls. template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6)

The next 3 functions are variations of the first 3 above (slm-ga-1,2,3) and were added only to support simd_view instead of simd for byte_offsets and/or pass_thru operands. template <typename T, int N, int VS = 1, typename OffsetSimdViewT typename PropertyListT = empty_props_t> simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru PropertyListT props = {}); // (slm-ga-7) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) Supported platforms: DG2, PVC only - Temporary restriction for the variant with pass_thru operand. Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets, and returns the loaded elements. Access to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru operand is returned.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
mask	The access mask, defaults to all 1s.
pass_thru	The vector pass through values.
props	The optional compile-time properties. Only 'alignment' property is used.

Returns: A vector of elements read.

Definition at line 4421 of file memory.hpp.

◆ slm_gather_rgba()

template<typename T , int N, rgba_channel_mask RGBAMask>

__ESIMD_API std::enable_if_t<(N == 8 \|\| N == 16 \|\| N == 32) && (sizeof(T) == 4), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba	(	simd< uint32_t, N >	offsets,
		simd_mask< N >	mask = `1`
	)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters

T	The element type of the returned vector.
N	The number of elements to access.
RGBAMask	Pixel's channel mask.

Parameters

offsets	Byte offsets within the SLM of each element.
mask	Operation mask. All-1 by default.

Returns: Gathered data as an N - element vector.

Definition at line 4943 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), simd_mask< _Tp, _Abi >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().

◆ slm_init() [1/2]

template<uint32_t SLMSize>

__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( )

Declare per-work-group slm size.

GPU RT/driver requires this function to be called in the beginning of the kernel using SLM. There must be only 1 call site of slm_init() per kernel. If slm_init is called from some function F called from the kernel, then inlining of F into the kernel must be managed/guaranteed. slm_init<SLMSize> can also be used together with slm_allocator() class. In such cases slm_allocator<AdditionalMem> allocates extra chunk of SLM memory and the final amount of allocated SLM may be bigger than what is requested by slm_init. See more details on slm_allocator class usage at it's declaration and ESIMD extension SPEC.

Template Parameters

SLMSize Shared Local Memory (SLM) size

Definition at line 4336 of file memory.hpp.

◆ slm_init() [2/2]

__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init ( uint32_t size )

Declare per-work-group slm size.

Non-constant argument version to be used with specialization constants only. Same restrictions are applied to this function as to it's template variant slm_init<SLMSize>(). This version has an additional restriction - it cannot be used together

Parameters

size	Shared Local Memory (SLM) size to be allocated for each work-group of ESIMD kernel.

Definition at line 4348 of file memory.hpp.

◆ slm_scalar_load()

template<typename T >

__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load ( uint32_t offset )

Load a scalar value from the Shared Local Memory.

Template Parameters

T	type of the value

Parameters

offset SLM offset in bytes

Returns: the loaded value

Definition at line 4743 of file memory.hpp.

◆ slm_scalar_store()

template<typename T >

__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store	(	uint32_t	offset,
		T	val
	)

Store a scalar value into the Shared Local Memory.

Template Parameters

T	type of the value

Parameters

offset	SLM offset in bytes
val	value to store

Definition at line 4926 of file memory.hpp.

◆ slm_scatter() [1/4]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter	(	OffsetSimdViewT	byte_offsets,
		simd< T, N >	vals,
		PropertyListT	props = `{}`
	)

void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
vals	The vector of values to store.
props	The optional compile-time properties. Only 'alignment' property is used.

Definition at line 4915 of file memory.hpp.

◆ slm_scatter() [2/4]

template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter	(	OffsetSimdViewT	byte_offsets,
		simd< T, N >	vals,
		simd_mask< N/VS >	mask,
		PropertyListT	props = `{}`
	)

template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Storage to any element's memory location can be disabled via the input vector of predicates mask. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors..

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
vals	The vector of values to store.
mask	The access mask, defaults to all 1s.
props	The optional compile-time properties. Only 'alignment' property is used.

Definition at line 4889 of file memory.hpp.

◆ slm_scatter() [3/4]

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter	(	simd< uint32_t, N/VS >	byte_offsets,
		simd< T, N >	vals,
		PropertyListT	props = `{}`
	)

template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors..

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned.
vals	The vector of values to store.
props	The optional compile-time properties. Only 'alignment' property is used.

Definition at line 4854 of file memory.hpp.

◆ slm_scatter() [4/4]

template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>

__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter	(	simd< uint32_t, N/VS >	byte_offsets,
		simd< T, N >	vals,
		simd_mask< N/VS >	mask,
		PropertyListT	props = `{}`
	)

template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2)

The next 2 functions are variations of the first 2 above (slm-sc-1,2) and were added only to support simd_view instead of simd for byte_offsets. template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets. Storage of any element can be disabled via the input vector of predicates mask. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.

Template Parameters

T	Element type.
N	Number of elements to read.
VS	Vector size. It can also be read as the number of writes per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.

Parameters

byte_offsets	the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size.
vals	The vector of values to store.
mask	The access mask, defaults to all 1s.
props	The optional compile-time properties. Only 'alignment' property is used.

Definition at line 4793 of file memory.hpp.

◆ slm_scatter_rgba()

template<typename T , int N, rgba_channel_mask Mask>

__ESIMD_API std::enable_if_t<(N == 8 \|\| N == 16 \|\| N == 32) && (sizeof(T) == 4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba	(	simd< uint32_t, N >	offsets,
		simd< T, N *get_num_channels_enabled(Mask)>	vals,
		simd_mask< N >	mask = `1`
	)

Gather data from the Shared Local Memory at specified offsets and return it as simd vector.

See usm_scatter_rgba for information about the operation semantics and parameter restrictions/interdependencies.

Template Parameters

T	The element type of the returned vector.
N	The number of elements to access.
Mask	Pixel's channel mask.

Parameters

offsets	Byte offsets within the SLM of each element.
vals	values to be written.
mask	Operation mask. All-1 by default.

Definition at line 4961 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().

Functions

Detailed Description

Function Documentation

◆ atomic_update() [1/3]

◆ atomic_update() [2/3]

◆ atomic_update() [3/3]

◆ block_load() [1/6]

◆ block_load() [2/6]

◆ block_load() [3/6]

◆ block_load() [4/6]

◆ block_load() [5/6]

◆ block_load() [6/6]

◆ block_store() [1/4]

◆ block_store() [2/4]

◆ block_store() [3/4]

◆ block_store() [4/4]

◆ slm_atomic_update() [1/3]

◆ slm_atomic_update() [2/3]

◆ slm_atomic_update() [3/3]

◆ slm_block_load() [1/4]

◆ slm_block_load() [2/4]

◆ slm_block_load() [3/4]

◆ slm_block_load() [4/4]

◆ slm_block_store() [1/3]

◆ slm_block_store() [2/3]

◆ slm_block_store() [3/3]

◆ slm_gather() [1/9]

◆ slm_gather() [2/9]

◆ slm_gather() [3/9]

◆ slm_gather() [4/9]

◆ slm_gather() [5/9]

◆ slm_gather() [6/9]

◆ slm_gather() [7/9]

◆ slm_gather() [8/9]

◆ slm_gather() [9/9]

◆ slm_gather_rgba()

◆ slm_init() [1/2]

◆ slm_init() [2/2]

◆ slm_scalar_load()

◆ slm_scalar_store()

◆ slm_scatter() [1/4]

◆ slm_scatter() [2/4]

◆ slm_scatter() [3/4]

◆ slm_scatter() [4/4]

◆ slm_scatter_rgba()