Functions | |
template<uint32_t SLMSize> | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::slm_init () |
Declare per-work-group slm size. More... | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::slm_init (uint32_t size) |
Declare per-work-group slm size. More... | |
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={}) |
template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) More... | |
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, PropertyListT props={}) |
template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T , int N, int VS, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N/VS > byte_offsets, PropertyListT props={}) |
template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, simd< T, N > pass_thru, PropertyListT props={}) |
template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, simd_mask< N > mask, PropertyListT props={}) |
template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (simd< uint32_t, N > byte_offsets, PropertyListT props={}) |
template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={}) |
template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, simd_mask< N/VS > mask, PropertyListT props={}) |
simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_gather (OffsetSimdViewT byte_offsets, PropertyListT props={}) |
simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets , and returns the loaded elements. More... | |
template<typename T > | |
__ESIMD_API T | sycl::_V1::ext::intel::esimd::slm_scalar_load (uint32_t offset) |
Load a scalar value from the Shared Local Memory. More... | |
template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={}) |
template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) More... | |
template<typename T , int N, int VS = 1, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::slm_scatter (simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, PropertyListT props={}) |
template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets . More... | |
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={}) |
template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets . More... | |
template<typename T , int N, int VS = 1, typename OffsetSimdViewT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v< OffsetSimdViewT > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::slm_scatter (OffsetSimdViewT byte_offsets, simd< T, N > vals, PropertyListT props={}) |
void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets . More... | |
template<typename T > | |
__ESIMD_API void | sycl::_V1::ext::intel::esimd::slm_scalar_store (uint32_t offset, T val) |
Store a scalar value into the Shared Local Memory. More... | |
template<typename T , int N, rgba_channel_mask RGBAMask> | |
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> > | sycl::_V1::ext::intel::esimd::slm_gather_rgba (simd< uint32_t, N > offsets, simd_mask< N > mask=1) |
Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More... | |
template<typename T , int N, rgba_channel_mask Mask> | |
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4)> | sycl::_V1::ext::intel::esimd::slm_scatter_rgba (simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1) |
Gather data from the Shared Local Memory at specified offsets and return it as simd vector. More... | |
template<typename T , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>> | |
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, Flags) |
Loads a contiguous block of SLM memory referenced by the given byte-offset offset , then returns the loaded data as a simd object. More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, PropertyListT props={}) |
Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the byte_offset . More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={}) |
simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset . More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_block_load (uint32_t offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={}) |
simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset . More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, PropertyListT props={}) |
simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset . More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, PropertyListT props={}) |
simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, PropertyListT props={}) |
simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset . More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, PropertyListT props={}) |
simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, uint32_t byte_offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={}) |
simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc at the given byte_offset . More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::block_load (AccessorT lacc, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={}) |
simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc at zero offset. More... | |
template<typename T , int N, typename Flags > | |
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags > > | sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t offset, simd< T, N > vals, Flags) |
Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offset . More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={}) |
Each of the following slm_block_store functions stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the byte_offset . More... | |
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::slm_block_store (uint32_t byte_offset, simd< T, N > vals, PropertyListT props={}) |
void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset . More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, PropertyListT props={}) |
void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc at the given byte_offset . More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, PropertyListT props={}) |
void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc . More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, uint32_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={}) |
void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={}); More... | |
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t> | |
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > | sycl::_V1::ext::intel::esimd::block_store (AccessorT lacc, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={}) |
void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector vals to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc . More... | |
template<atomic_op Op, typename T , int N> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd_mask< N > mask=1) |
More... | |
template<atomic_op Op, typename T , int N, typename AccessorT > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1) |
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates N memory locations in SLM ssociated with the local accessor lacc at the given byte_offset , and returns a vector of old values found at the memory locations before update. More... | |
template<atomic_op Op, typename T , int N> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1) |
One argument variant of the atomic update operation. More... | |
template<atomic_op Op, typename T , int N, typename AccessorT > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==1 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd_mask< N > mask=1) |
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1) More... | |
template<atomic_op Op, typename T , int N> | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2, simd< T, N > > | sycl::_V1::ext::intel::esimd::slm_atomic_update (simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1) |
Two argument variant of the atomic update operation. More... | |
template<atomic_op Op, typename T , int N, typename AccessorT > | |
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==2 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > | sycl::_V1::ext::intel::esimd::atomic_update (AccessorT lacc, simd< uint32_t, N > byte_offset, simd< T, N > src0, simd< T, N > src1, simd_mask< N > mask=1) |
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1) More... | |
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update | ( | AccessorT | lacc, |
simd< uint32_t, N > | byte_offset, | ||
simd< T, N > | src0, | ||
simd< T, N > | src1, | ||
simd_mask< N > | mask = 1 |
||
) |
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1)
Definition at line 6127 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update | ( | AccessorT | lacc, |
simd< uint32_t, N > | byte_offset, | ||
simd< T, N > | src0, | ||
simd_mask< N > | mask = 1 |
||
) |
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1)
Atomically updates N
memory locations in SLM indicated by local accessor lacc
and a vector of offsets, and returns a vector of old values found at the memory locations before update.
Op | The atomic operation. |
T | The vector element type. |
N | The number of memory locations to update. |
byte_offset | The vector of 32-bit offsets. |
src0 | is the first atomic operand. |
mask | Operation mask, only locations with non-zero in the corresponding mask element are updated. |
Definition at line 6057 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::src0.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0 && __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>, simd<T, N> > sycl::_V1::ext::intel::esimd::atomic_update | ( | AccessorT | lacc, |
simd< uint32_t, N > | byte_offset, | ||
simd_mask< N > | mask = 1 |
||
) |
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); // (lacc-au0-1) Atomically updates N
memory locations in SLM ssociated with the local accessor lacc
at the given byte_offset
, and returns a vector of old values found at the memory locations before update.
Definition at line 5966 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load | ( | AccessorT | lacc, |
PropertyListT | props = {} |
||
) |
simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc
at zero offset.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props
produces incorrect load results on Gen12.
Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.
Definition at line 5293 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load | ( | AccessorT | lacc, |
simd_mask< 1 > | pred, | ||
PropertyListT | props = {} |
||
) |
simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc
at zero offset.
The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The local accessor lacc
must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5371 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load | ( | AccessorT | lacc, |
simd_mask< 1 > | pred, | ||
simd< T, N > | pass_thru, | ||
PropertyListT | props = {} |
||
) |
simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc
at zero offset.
The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru
value is returned.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The local accessor lacc
must point to memory at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512. R2: The target device must be DG2, PVC or newer GPU.
Definition at line 5450 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load | ( | AccessorT | lacc, |
uint32_t | byte_offset, | ||
PropertyListT | props = {} |
||
) |
simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) associated with the local accessor lacc
at the given byte_offset
.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing local accessor associated with SLM starting from offset that is NOT aligned by 16-bytes and NOT specifying the actual alignment in props
produces incorrect load results on Gen12.
Note: if two or more local accessors are used in the same kernel, then 16-byte alignment is guaranteed only for one of them. Other local accessors may or may not get 16-byte alignment. N-th local accessor's alignment depends on N-1 local accessor sizes, and their element-alignment/padding. Only element-alignment is guaranteed for them.
Definition at line 5258 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load | ( | AccessorT | lacc, |
uint32_t | byte_offset, | ||
simd_mask< 1 > | pred, | ||
PropertyListT | props = {} |
||
) |
simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc
at the given byte_offset
.
The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and some undefined value is returned.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The lacc
+ byte_offset
must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5332 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load | ( | AccessorT | lacc, |
uint32_t | byte_offset, | ||
simd_mask< 1 > | pred, | ||
simd< T, N > | pass_thru, | ||
PropertyListT | props = {} |
||
) |
simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) Loads a contiguous memory block from SLM (Shared Local Memory) associated the local accessor lacc
at the given byte_offset
.
The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP, and pass_thru
value is returned.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The lacc
+ byte_offset
must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5410 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store | ( | AccessorT | lacc, |
simd< T, N > | vals, | ||
PropertyListT | props = {} |
||
) |
void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={}); Stores the vector vals
to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc
.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset
not aligned by 16-bytes and not specifying the actual alignment in props
produces incorrect store results on Gen12.
Definition at line 5673 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store | ( | AccessorT | lacc, |
simd< T, N > | vals, | ||
simd_mask< 1 > | pred, | ||
PropertyListT | props = {} |
||
) |
void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={}); Stores the vector vals
to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc
.
The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The byte_offset
must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5749 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store | ( | AccessorT | lacc, |
uint32_t | byte_offset, | ||
simd< T, N > | vals, | ||
PropertyListT | props = {} |
||
) |
void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={}); Stores the vector vals
to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc
at the given byte_offset
.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset
not aligned by 16-bytes and not specifying the actual alignment in props
produces incorrect store results on Gen12.
Definition at line 5645 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_local_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store | ( | AccessorT | lacc, |
uint32_t | byte_offset, | ||
simd< T, N > | vals, | ||
simd_mask< 1 > | pred, | ||
PropertyListT | props = {} |
||
) |
void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});
Stores the vector vals
to a contiguous memory block in SLM (Shared Local Memory) associated with the local accessor lacc
at the given byte_offset
. The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The byte_offset
must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5711 of file memory.hpp.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 2, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update | ( | simd< uint32_t, N > | byte_offset, |
simd< T, N > | src0, | ||
simd< T, N > | src1, | ||
simd_mask< N > | mask = 1 |
||
) |
Two argument variant of the atomic update operation.
simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<1> pred = 1); // (lacc-au2-1) simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask = 1); /// (slm-au2-1) Atomically updates N
memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.
Op | The atomic operation. |
T | The vector element type. |
N | The number of memory locations to update. |
byte_offset | The vector of 32-bit offsets. |
src0 | is the first atomic operand (new value). |
src1 | is the second atomic operand (expected value). |
mask | Operation mask, only locations with non-zero in the corresponding mask element are updated. |
Definition at line 6097 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::fcmpxchg, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl(), sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::src1.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 1, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update | ( | simd< uint32_t, N > | byte_offset, |
simd< T, N > | src0, | ||
simd_mask< N > | mask = 1 |
||
) |
One argument variant of the atomic update operation.
simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1); /// (slm-au1-1) simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<1> pred = 1); // (lacc-au1-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0, simd_mask<N> mask = 1) /// (slm-au1-1)
Atomically updates N
memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.
Op | The atomic operation. |
T | The vector element type. |
N | The number of memory locations to update. |
byte_offset | The vector of 32-bit offsets. |
src0 | is the first atomic operand. |
mask | Operation mask, only locations with non-zero in the corresponding mask element are updated. |
Definition at line 6008 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::fadd, sycl::_V1::ext::intel::esimd::fmax, sycl::_V1::ext::intel::esimd::fmin, sycl::_V1::ext::intel::esimd::fsub, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl(), sycl::_V1::ext::intel::esimd::src0, and sycl::_V1::ext::intel::esimd::store.
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>) == 0, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_atomic_update | ( | simd< uint32_t, N > | byte_offset, |
simd_mask< N > | mask = 1 |
||
) |
Atomic update operation performed on SLM. No-argument variant of the atomic update operation. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1) The following functions do the same work as slm_atomic_update(). They accept a local accessor lacc
and the atomic update is done from SLM associated with lacc
plus byte_offset
applied to it. If byte_offset
is omitted, then zero offset is used. simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<1> pred = 1); // (lacc-au0-1) Usage of cache hints or non-standard operation width N requires DG2 or PVC. simd<T, N> slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1); /// (slm-au0-1)
Atomically updates N
memory locations in SLM indicated by a vector of offsets, and returns a vector of old values found at the memory locations before update.
Op | The atomic operation - can be atomic_op::inc or atomic_op::dec , atomic_op::load . |
T | The vector element type. |
N | The number of memory locations to update. |
byte_offset | The vector of 32-bit offsets. |
mask | Operation mask, only locations with non-zero in the corresponding mask element are updated. |
Definition at line 5929 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), sycl::_V1::ext::intel::esimd::detail::default_size, sycl::_V1::ext::intel::esimd::get_surface_index(), sycl::_V1::ext::intel::esimd::detail::isPowerOf2(), sycl::_V1::ext::intel::esimd::load, and sycl::_V1::ext::intel::esimd::detail::slm_atomic_update_impl().
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load | ( | uint32_t | byte_offset, |
Flags | |||
) |
Loads a contiguous block of SLM memory referenced by the given byte-offset offset
, then returns the loaded data as a simd object.
The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient gather is generated. If the loaded vector is too long for 1 flat-load GPU instruction, then a series of flat-loads and/or gathers may be generated.
T | Element type. |
N | Number of elements to load. |
Flags | The alignment specifier type tag. |
byte_offset | The byte-offset to load from. |
Flags | Specifies the alignment. |
Definition at line 4990 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load | ( | uint32_t | byte_offset, |
PropertyListT | props = {} |
||
) |
Each of the following slm_block_load functions loads a contiguous memory block from SLM (Shared Local Memory) and the byte_offset
.
The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. simd<T, N> slm_block_load(uint32_t byte_offset, props={}); // (slm-bl-1) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, props={}); // (slm-bl-2) simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) The following functions do the same work as slm_block_load(). They accept a local accessor lacc
and the load is done from SLM associated with lacc
plus byte_offset
applied to it. If byte_offset
is omitted, then zero offset is used. simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, props={}); // (lacc-bl-1) simd<T, N> block_load(local_accessor lacc, props={}); // (lacc-bl-2) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, props={}); // (lacc-bl-3) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, props={}); // (lacc-bl-4) simd<T, N> block_load(local_accessor lacc, uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-5) simd<T, N> block_load(local_accessor lacc, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (lacc-bl-6) simd<T, N> slm_block_load(uint32_t byte_offset, props = {}); // (slm-bl-1) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset
. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_load instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower GATHER instead of faster BLOCK_LOAD. !!! Passing byte_offset
not aligned by 16-bytes and not specifying the actual alignment in props
produces incorrect load results on Gen12.
Definition at line 5048 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load | ( | uint32_t | byte_offset, |
simd_mask< 1 > | pred, | ||
PropertyListT | props = {} |
||
) |
simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<N> pred, props = {}); // (slm-bl-2) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset
.
The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The byte_offset
must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5086 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_block_load | ( | uint32_t | offset, |
simd_mask< 1 > | pred, | ||
simd< T, N > | pass_thru, | ||
PropertyListT | props = {} |
||
) |
simd<T, N> slm_block_load(uint32_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (slm-bl-3) Loads a contiguous memory block from SLM (Shared Local Memory) at the given byte_offset
.
The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The byte_offset
must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5172 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store | ( | uint32_t | byte_offset, |
simd< T, N > | vals, | ||
PropertyListT | props = {} |
||
) |
void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props = {}); Stores the vector vals
to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset
.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is 16-bytes to generate block_store instruction on all known target devices (Gen12, DG2, PVC, etc). On Gen12 (opposing to DG2 and PVC) the alignment smaller than 8-bytes is valid, but requires JIT compiler generating a slower SCATTER instead of faster BLOCK_STORE. !!! Passing byte_offset
not aligned by 16-bytes and not specifying the actual alignment in props
produces incorrect store results on Gen12.
Definition at line 5611 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_block_store | ( | uint32_t | byte_offset, |
simd< T, N > | vals, | ||
simd_mask< 1 > | pred, | ||
PropertyListT | props = {} |
||
) |
Each of the following slm_block_store functions stores the vector vals
to a contiguous memory block in SLM (Shared Local Memory) at the byte_offset
.
The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP. The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored. void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-2) props={});
The following functions do the same work as slm_block_store(). They accept a local accessor lacc
and the store of vals
is done to SLM associated with lacc
plus byte_offset
applied to it. If byte_offset
is omitted, then zero offset is used. void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-1) simd<T, N> vals, props={});
void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-2) props={});
void block_store(local_accessor lacc, uint32_t byte_offset, // (lacc-bs-3) simd<T, N> vals, simd_mask<1> pred, props={});
void block_store(local_accessor lacc, simd<T, N> vals, // (lacc-bs-4) simd_mask<1> pred, props={});
void slm_block_store(uint32_t byte_offset, simd<T, N> vals, // (slm-bs-1) simd_mask<1> pred, props={}); Stores the vector vals
to a contiguous memory block in SLM (Shared Local Memory) at the given byte_offset
. The parameter pred
is the one-element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block stored operation is a NO-OP.
The parameter 'props' specifies the optional compile-time properties list. Only esimd::alignment property is used. Other properties are ignored.
Alignment: If props
does not specify the 'alignment' property, then the default expected alignment is the minimally required (see (R1) below).
Restrictions - predicate imposed - temporary: R1: The byte_offset
must be at least 4-byte aligned for 4-byte or smaller elements and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.
Definition at line 5538 of file memory.hpp.
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags> > sycl::_V1::ext::intel::esimd::slm_block_store | ( | uint32_t | offset, |
simd< T, N > | vals, | ||
Flags | |||
) |
Stores elements of the vector vals
to a contiguous block of SLM memory at the given byte-offset offset
.
The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient scatter is generated. If the stored vector is too long for 1 flat-store GPU instruction, then a series of flat-store and/or scatters may be generated.
T | Element type. |
N | Number of elements to store. |
Flags | The alignment specifier type tag. |
offset | The byte-offset to store at. |
vals | The vector to store. |
Flags | Specifies the alignment. |
Definition at line 5473 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data().
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | OffsetSimdViewT | byte_offsets, |
PropertyListT | props = {} |
||
) |
simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4734 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | OffsetSimdViewT | byte_offsets, |
simd_mask< N/VS > | mask, | ||
PropertyListT | props = {} |
||
) |
simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
Access to any element's memory location can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
mask | The access mask, defaults to all 1s. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4706 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | OffsetSimdViewT | byte_offsets, |
simd_mask< N/VS > | mask, | ||
simd< T, N > | pass_thru, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> simd <T, N> slm_gather( OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-7) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
Access to any element's memory location can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru
operand is returned.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size. |
mask | The access mask, defaults to all 1s. |
pass_thru | The vector pass through values. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4673 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | simd< uint32_t, N > | byte_offsets, |
PropertyListT | props = {} |
||
) |
template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
T | Element type. |
N | Number of elements to read. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4635 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | simd< uint32_t, N > | byte_offsets, |
simd_mask< N > | mask, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
Access to any element's memory location can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.
T | Element type. |
N | Number of elements to read. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
mask | The access mask, defaults to all 1s. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4610 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | simd< uint32_t, N > | byte_offsets, |
simd_mask< N > | mask, | ||
simd< T, N > | pass_thru, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
Access to any element's memory location can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru
operand is returned.
T | Element type. |
N | Number of elements to read. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size. |
mask | The access mask, defaults to all 1s. |
pass_thru | The vector pass through values. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4580 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | simd< uint32_t, N/VS > | byte_offsets, |
PropertyListT | props = {} |
||
) |
template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4548 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | simd< uint32_t, N/VS > | byte_offsets, |
simd_mask< N/VS > | mask, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements.
Access to any element's memory location can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element of the returned vector is undefined.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
mask | The access mask, defaults to all 1s. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4489 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::slm_gather | ( | simd< uint32_t, N/VS > | byte_offsets, |
simd_mask< N/VS > | mask, | ||
simd< T, N > | pass_thru, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-2) simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, PropertyListT props = {}); // (slm-ga-3)
The next 3 functions are similar to the above and were added for convenience. They assume the VS parameter is set to 1 and do not require specifying the template parameters <T, N, VS> at function calls. template <typename T, int N, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-4) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, simd_mask<N> mask, PropertyListT props = {});// (slm-ga-5) simd<T, N> slm_gather(simd<uint32_t, N> byte_offsets, PropertyListT props = {}); // (slm-ga-6)
The next 3 functions are variations of the first 3 above (slm-ga-1,2,3) and were added only to support simd_view instead of simd for byte_offsets and/or pass_thru operands. template <typename T, int N, int VS = 1, typename OffsetSimdViewT typename PropertyListT = empty_props_t> simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru PropertyListT props = {}); // (slm-ga-7) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) simd <T, N> slm_gather(OffsetSimdViewT byte_offsets, PropertyListT props = {}); // (slm-ga-9) template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T, N> slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}); // (slm-ga-1) Supported platforms: DG2, PVC only - Temporary restriction for the variant with pass_thru operand. Loads ("gathers") elements of the type 'T' from Shared Local Memory locations addressed by byte offsets byte_offsets
, and returns the loaded elements. Access to any element's memory location can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the load from (byte_offsets[i]) is skipped and the corresponding i-th element from pass_thru
operand is returned.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size. |
mask | The access mask, defaults to all 1s. |
pass_thru | The vector pass through values. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4421 of file memory.hpp.
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4), simd<T, N * get_num_channels_enabled(RGBAMask)> > sycl::_V1::ext::intel::esimd::slm_gather_rgba | ( | simd< uint32_t, N > | offsets, |
simd_mask< N > | mask = 1 |
||
) |
Gather data from the Shared Local Memory at specified offsets
and return it as simd vector.
See usm_gather_rgba for information about the operation semantics and parameter restrictions/interdependencies.
T | The element type of the returned vector. |
N | The number of elements to access. |
RGBAMask | Pixel's channel mask. |
offsets | Byte offsets within the SLM of each element. |
mask | Operation mask. All-1 by default. |
N
- element vector. Definition at line 4943 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), simd_mask< _Tp, _Abi >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init | ( | ) |
Declare per-work-group slm size.
GPU RT/driver requires this function to be called in the beginning of the kernel using SLM. There must be only 1 call site of slm_init() per kernel. If slm_init is called from some function F called from the kernel, then inlining of F into the kernel must be managed/guaranteed. slm_init<SLMSize> can also be used together with slm_allocator() class. In such cases slm_allocator<AdditionalMem> allocates extra chunk of SLM memory and the final amount of allocated SLM may be bigger than what is requested by slm_init. See more details on slm_allocator class usage at it's declaration and ESIMD extension SPEC.
SLMSize | Shared Local Memory (SLM) size |
Definition at line 4336 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_init | ( | uint32_t | size | ) |
Declare per-work-group slm size.
Non-constant argument version to be used with specialization constants only. Same restrictions are applied to this function as to it's template variant slm_init<SLMSize>(). This version has an additional restriction - it cannot be used together
size | Shared Local Memory (SLM) size to be allocated for each work-group of ESIMD kernel. |
Definition at line 4348 of file memory.hpp.
__ESIMD_API T sycl::_V1::ext::intel::esimd::slm_scalar_load | ( | uint32_t | offset | ) |
Load a scalar value from the Shared Local Memory.
T | type of the value |
offset | SLM offset in bytes |
Definition at line 4743 of file memory.hpp.
__ESIMD_API void sycl::_V1::ext::intel::esimd::slm_scalar_store | ( | uint32_t | offset, |
T | val | ||
) |
Store a scalar value into the Shared Local Memory.
T | type of the value |
offset | SLM offset in bytes |
val | value to store |
Definition at line 4926 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter | ( | OffsetSimdViewT | byte_offsets, |
simd< T, N > | vals, | ||
PropertyListT | props = {} |
||
) |
void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets
.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
vals | The vector of values to store. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4915 of file memory.hpp.
__ESIMD_API std::enable_if_t< detail::is_simd_view_type_v<OffsetSimdViewT> && ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter | ( | OffsetSimdViewT | byte_offsets, |
simd< T, N > | vals, | ||
simd_mask< N/VS > | mask, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter( OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets
.
Storage to any element's memory location can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size. |
vals | The vector of values to store. |
mask | The access mask, defaults to all 1s. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4889 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter | ( | simd< uint32_t, N/VS > | byte_offsets, |
simd< T, N > | vals, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets
.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of reads per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors.. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. |
vals | The vector of values to store. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4854 of file memory.hpp.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::slm_scatter | ( | simd< uint32_t, N/VS > | byte_offsets, |
simd< T, N > | vals, | ||
simd_mask< N/VS > | mask, | ||
PropertyListT | props = {} |
||
) |
template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-2)
The next 2 functions are variations of the first 2 above (slm-sc-1,2) and were added only to support simd_view instead of simd for byte_offsets. template <typename T, int N, int VS = 1, typename OffsetSimdViewT, typename PropertyListT = empty_props_t> void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-3) void slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals, PropertyListT props = {}); // (slm-sc-4) template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatter(simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals, simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-sc-1) Stores ("scatters") elements of the type 'T' to Shared Local Memory locations addressed by byte offsets byte_offsets
. Storage of any element can be disabled via the input vector of predicates mask
. If mask[i] is unset, then the storage to (byte_offsets[i]) is skipped.
T | Element type. |
N | Number of elements to read. |
VS | Vector size. It can also be read as the number of writes per each address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported only on DG2 and PVC and only for 4- and 8-byte element vectors. |
byte_offsets | the vector of 32-bit offsets in bytes. For each i, (byte_offsets[i]) must be element size aligned. If the alignment property is not passed, then it is assumed that each accessed address is aligned by element-size. |
vals | The vector of values to store. |
mask | The access mask, defaults to all 1s. |
props | The optional compile-time properties. Only 'alignment' property is used. |
Definition at line 4793 of file memory.hpp.
__ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)> sycl::_V1::ext::intel::esimd::slm_scatter_rgba | ( | simd< uint32_t, N > | offsets, |
simd< T, N *get_num_channels_enabled(Mask)> | vals, | ||
simd_mask< N > | mask = 1 |
||
) |
Gather data from the Shared Local Memory at specified offsets
and return it as simd vector.
See usm_scatter_rgba for information about the operation semantics and parameter restrictions/interdependencies.
T | The element type of the returned vector. |
N | The number of elements to access. |
Mask | Pixel's channel mask. |
offsets | Byte offsets within the SLM of each element. |
vals | values to be written. |
mask | Operation mask. All-1 by default. |
Definition at line 4961 of file memory.hpp.
References sycl::_V1::ext::intel::esimd::detail::simd_obj_impl< RawTy, N, Derived, SFINAE >::data(), and sycl::_V1::ext::intel::esimd::get_surface_index().