DPC++ Runtime
Runtime libraries for oneAPI DPC++
Block load/prefetch/store functions.
Collaboration diagram for Block load/prefetch/store functions.:

Functions

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (const T *ptr, PropertyListT props={})
 Each of the following block load functions loads a contiguous memory block from the address referenced by the USM pointer 'ptr', or from 'ptr + offset', where 'offset' is the offset in bytes (not in elements!). More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (const T *ptr, size_t byte_offset, PropertyListT props={})
 simd<T, N> block_load(const T* ptr, size_t byte_offset, props={}); // (usm-bl-2) This function loads a contiguous memory block from address referenced by USM pointer ptr and the given byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (const T *ptr, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> block_load(const T* ptr, simd_mask<1> pred, props={}); // (usm-bl-3) This function loads a contiguous memory block from USM pointer ptr. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (const T *ptr, size_t byte_offset, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> block_load(const T* ptr, size_t byte_offset, simd_mask<1> pred, props={}); // (usm-bl-4) This function loads a contiguous memory block from address referenced by USM pointer ptr and the given byte_offset. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (const T *ptr, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> block_load(const T* ptr, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (usm-bl-5) This function loads a contiguous memory block from USM pointer ptr. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (const T *ptr, size_t byte_offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT props={})
 simd<T, N> block_load(const T* ptr, size_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (usm-bl-6) This function loads a contiguous memory block from address referenced by USM pointer ptr and the given byte_offset. More...
 
template<typename Tx , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags >, simd< Tx, N > > sycl::_V1::ext::intel::esimd::block_load (const Tx *addr, Flags)
 Loads a contiguous block of memory from the given memory address addr and returns the loaded data as a vector. More...
 
template<typename Tx , int N, typename AccessorTy , typename Flags = vector_aligned_tag, typename = std::enable_if_t< is_simd_flag_type_v<Flags> && detail::is_device_accessor_with_v< AccessorTy, detail::accessor_mode_cap::can_read>>, class T = detail::__raw_t<Tx>>
__ESIMD_API simd< Tx, N > sycl::_V1::ext::intel::esimd::block_load (AccessorTy acc, detail::DeviceAccessorOffsetT byte_offset, Flags flags)
 Loads a contiguous block of memory from the given accessor acc and byte_offset and returns the loaded data as a vector. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT acc, detail::DeviceAccessorOffsetT byte_offset, PropertyListT props={})
 Each of the following block load functions loads a contiguous memory block from the address referenced by accessor 'acc', or from 'acc + byte_offset', The parameter 'pred' is the one element predicate. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT acc, PropertyListT={})
 simd<T, N> block_load(AccessorT acc, props = {}); // (acc-bl-2) This function loads a contiguous memory block referenced by accessor acc and implied offset=0. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT acc, detail::DeviceAccessorOffsetT byte_offset, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT={})
 simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props = {}); // (acc-bl-3) This function loads a contiguous memory block referenced by accessor acc and the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT acc, detail::DeviceAccessorOffsetT byte_offset, simd_mask< 1 > pred, PropertyListT props={})
 simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, simd_mask<1> pred, props = {}); // (acc-bl-4) This function loads a contiguous memory block referenced by accessor acc and the given byte_offset. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT acc, simd_mask< 1 > pred, simd< T, N > pass_thru, PropertyListT={})
 simd<T, N> block_load(AccessorT acc, simd_mask<1> pred, simd<T, N> pass_thru, props = {}); // (acc-bl-5) This function loads a contiguous memory block referenced by accessor acc and implied offset=0. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read >, simd< T, N > > sycl::_V1::ext::intel::esimd::block_load (AccessorT acc, simd_mask< 1 > pred, PropertyListT={})
 simd<T, N> block_load(AccessorT acc, simd_mask<1> pred, props = {}); // (acc-bl-6) This function loads a contiguous memory block referenced by accessor acc and implied offset=0. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (T *ptr, simd< T, N > vals, PropertyListT={})
 Each of the following block store functions stores a contiguous memory block to the address referenced by the USM pointer 'ptr', or from 'ptr + offset', where 'offset' is the offset in bytes (not in elements!) with data specified by 'vals'. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (T *ptr, size_t byte_offset, simd< T, N > vals, PropertyListT props={})
 void block_store(T* ptr, size_t byte_offset, // (usm-bs-2) simd<T, N> vals, props={}); This function stores a contiguous memory block to USM pointer ptr and byte-offset byte_offset with data specified by vals. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< detail::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (T *ptr, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT={})
 void block_store(T* ptr, simd<T, N> vals, // (usm-bs-3) simd_mask<1> pred, props={}); This function stores a contiguous memory block to USM pointer ptr with data specified by vals. More...
 
template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > sycl::_V1::ext::intel::esimd::block_store (T *ptr, size_t byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(T* ptr, size_t byte_offset, // (usm-bs-4) simd<T, N> vals, simd_mask<1> pred, props={}); This function stores a contiguous memory block to USM pointer ptr and byte-offset byte_offset with data specified by vals. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > > sycl::_V1::ext::intel::esimd::block_store (AccessorT acc, detail::DeviceAccessorOffsetT byte_offset, simd< T, N > vals, PropertyListT props={})
 Each of the following block_store functions stores the vector 'vals' to a contiguous memory block at the address referenced by accessor 'acc', or from 'acc + byte_offset', The parameter 'pred' is the one element predicate. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > > sycl::_V1::ext::intel::esimd::block_store (AccessorT acc, simd< T, N > vals, PropertyListT props={})
 void block_store(AccessorT acc, simd<T, N> vals, props = {}); // (acc-bs-2) This function stores a contiguous memory block to accessor acc with data specified by vals and implied offset=0. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > > sycl::_V1::ext::intel::esimd::block_store (AccessorT acc, detail::DeviceAccessorOffsetT byte_offset, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(AccessorT acc, OffsetT byte_offset, // (acc-bs-3) simd<T, N> vals, simd_mask<1> pred, props = {}); This function stores a contiguous memory block to accessor acc and byte_offset with data specified by vals. More...
 
template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > &&detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_write > > sycl::_V1::ext::intel::esimd::block_store (AccessorT acc, simd< T, N > vals, simd_mask< 1 > pred, PropertyListT props={})
 void block_store(AccessorT acc, simd<T, N> vals, // (acc-bs-4) simd_mask<1> pred, props = {}); This function stores a contiguous memory block to accessor acc with data specified by vals and implied offset=0. More...
 

Detailed Description

Function Documentation

◆ block_load() [1/14]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  acc,
detail::DeviceAccessorOffsetT  byte_offset,
PropertyListT  props = {} 
)

Each of the following block load functions loads a contiguous memory block from the address referenced by accessor 'acc', or from 'acc + byte_offset', The parameter 'pred' is the one element predicate.

If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0. The parameter 'props' specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::cache_hint_L3, esimd::alignment. simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, props = {}); // (acc-bl-1) simd<T, N> block_load(AccessorT acc, props = {}); // (acc-bl-2) simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props = {}); // (acc-bl-3) simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, simd_mask<1> pred, props = {}); // (acc-bl-4) simd<T, N> block_load(AccessorT acc, simd_mask<1> pred, simd<T, N> pass_thru, props = {}); // (acc-bl-5) simd<T, N> block_load(AccessorT acc, simd_mask<1> pred, props = {}); // (acc-bl-6) simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, props = {}); // (acc-bl-1) This function loads a contiguous memory block referenced by accessor acc and byte_offset.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the byte_offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. The alignment requirement may be less strict if stateless memory mode is ON, see block_load(usm_ptr, props) (aka usm-bl-01) for details/requirements.

Restrictions: there may be some extra restrictions depending on a) stateless memory mode enforcement is ON, b) cache hints are used, c) number of bytes loaded is either 16,32,64, or 128. If (b) || !(c), then the target device must be DG2 or PVC (not Gen12). If (a) && !(b), then there is no restriction on the number of elements to be loaded and byte_offset must be only element-aligned.

Gen12 requirements: !(b) && (c). It can load 16-, 32-, 64-, or 128-bytes only. DG2/PVC requirements: It can load such number of elements depending on the type 'T': for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more).

Definition at line 1748 of file memory.hpp.

◆ block_load() [2/14]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  acc,
detail::DeviceAccessorOffsetT  byte_offset,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, simd_mask<1> pred, props = {}); // (acc-bl-4) This function loads a contiguous memory block referenced by accessor acc and the given byte_offset.

If the predicate pred is set to 0, then the load is omitted and the returned value is undefined.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements.

Restrictions - cache hint and predicate imposed - temporary: R1: byte_offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1914 of file memory.hpp.

◆ block_load() [3/14]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  acc,
detail::DeviceAccessorOffsetT  byte_offset,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  = {} 
)

simd<T, N> block_load(AccessorT acc, OffsetT byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props = {}); // (acc-bl-3) This function loads a contiguous memory block referenced by accessor acc and the given byte_offset.

If the predicate pred is set to 0, then the load is omitted and the pass_thru value is returned.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the byte_offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements.

Restrictions - cache hint and predicate imposed - temporary: R1: byte_offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1863 of file memory.hpp.

◆ block_load() [4/14]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  acc,
PropertyListT  = {} 
)

simd<T, N> block_load(AccessorT acc, props = {}); // (acc-bl-2) This function loads a contiguous memory block referenced by accessor acc and implied offset=0.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2. Other properties are ignored. If props specifies the alignment property, then it is ignored because this variant implies zero offset, which means the most favourable 16-byte alignment is used.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Restrictions: there may be some extra restrictions depending on a) stateless memory mode enforcement is ON, b) cache hints are used, c) number of bytes loaded is either 16,32,64, or 128. If (b) || !(c), then the target device must be DG2 or PVC (not Gen12). If (a) && !(b), then there is no restriction on the number of elements to be loaded and byte_offset must be only element-aligned.

Gen12 requirements: !(b) && (c). It can load 16-, 32-, 64-, or 128-bytes only. DG2/PVC requirements: It can load such number of elements depending on the type 'T': for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512.

Definition at line 1816 of file memory.hpp.

◆ block_load() [5/14]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  acc,
simd_mask< 1 >  pred,
PropertyListT  = {} 
)

simd<T, N> block_load(AccessorT acc, simd_mask<1> pred, props = {}); // (acc-bl-6) This function loads a contiguous memory block referenced by accessor acc and implied offset=0.

If the predicate pred is set to 0, then the load is omitted and some undefined value is returned.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2. Other properties are ignored. If props specifies the alignment property, then it is ignored because this variant implies zero offset, which means the most favourable 16-byte alignment is used.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Restrictions - cache hint and predicate imposed - temporary: R1: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R2: The target device must be DG2, PVC or newer GPU.

Definition at line 1998 of file memory.hpp.

◆ block_load() [6/14]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_read>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( AccessorT  acc,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  = {} 
)

simd<T, N> block_load(AccessorT acc, simd_mask<1> pred, simd<T, N> pass_thru, props = {}); // (acc-bl-5) This function loads a contiguous memory block referenced by accessor acc and implied offset=0.

If the predicate pred is set to 0, then the load is omitted and the pass_thru value is returned.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2. Other properties are ignored. If props specifies the alignment property, then it is ignored because this variant implies zero offset, which means the most favourable 16-byte alignment is used.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Restrictions - cache hint and predicate imposed - temporary: R1: The number of elements must be: for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more). R2: The target device must be DG2, PVC or newer GPU.

Definition at line 1955 of file memory.hpp.

◆ block_load() [7/14]

template<typename Tx , int N, typename AccessorTy , typename Flags = vector_aligned_tag, typename = std::enable_if_t< is_simd_flag_type_v<Flags> && detail::is_device_accessor_with_v< AccessorTy, detail::accessor_mode_cap::can_read>>, class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> sycl::_V1::ext::intel::esimd::block_load ( AccessorTy  acc,
detail::DeviceAccessorOffsetT  byte_offset,
Flags  flags 
)

Loads a contiguous block of memory from the given accessor acc and byte_offset and returns the loaded data as a vector.

Actual code generated depends on the alignment parameter.

Template Parameters
TxElement type.
NNumber of elements to load, N * sizeof(Tx) must be 1, 2, 4 or 8 owords long.
AccessorTyAccessor type (auto-deduced).
FlagsThe alignment specifier type tag. Auto-deduced from the Flags parameter. If it is less than 16, then slower unaligned access is generated, otherwise the access is aligned.
Parameters
accThe accessor.
byte_offsetThe offset to load from in bytes.
FlagsSpecifies the alignment.
Returns
A vector of loaded elements.

Definition at line 1646 of file memory.hpp.

References sycl::_V1::ext::intel::esimd::alignment, and sycl::_V1::ext::intel::esimd::detail::isPowerOf2().

◆ block_load() [8/14]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( const T *  ptr,
PropertyListT  props = {} 
)

Each of the following block load functions loads a contiguous memory block from the address referenced by the USM pointer 'ptr', or from 'ptr + offset', where 'offset' is the offset in bytes (not in elements!).

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are loaded. Otherwise, the block load operation is a NO-OP. The parameter 'pass_thru' specifies the values being copied to the returned result if 'pred' is set to 0. The parameter 'props' specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::cache_hint_L3, esimd::alignment. simd<T, N> block_load(const T* ptr, props={}); // (usm-bl-1) simd<T, N> block_load(const T* ptr, size_t byte_offset, props={}); // (usm-bl-2) simd<T, N> block_load(const T* ptr, simd_mask<1> pred, props={}); // (usm-bl-3) simd<T, N> block_load(const T* ptr, size_t byte_offset, simd_mask<1> pred, props={}); // (usm-bl-4) simd<T, N> block_load(const T* ptr, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (usm-bl-5) simd<T, N> block_load(const T* ptr, size_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (usm-bl-6) simd<T, N> block_load(const T* ptr, props={}); // (usm-bl-1) This function loads a contiguous memory block from USM pointer ptr.

There may be temporary restrictions depending on L1, L2 cache hints, See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is 4-bytes for 4-byte or smaller elements and 8-bytes for 8-byte elements. The address may be element-size aligned even for byte- and word-elements, but in such case the smaller alignment property must explicitly passed to this function. Extra restrictions may be in place - see Restrictions/R1 below.

Restrictions - cache hint imposed - temporary: If L1 or L2 cache hint is passed, then: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1356 of file memory.hpp.

◆ block_load() [9/14]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t<detail::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( const T *  ptr,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> block_load(const T* ptr, simd_mask<1> pred, props={}); // (usm-bl-3) This function loads a contiguous memory block from USM pointer ptr.

If the predicate pred is set to 0, then the load is omitted and the returned value is undefined.

This function has temporary restrictions. See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is the minimally required element-size alignment. Note that additional/temporary restrictions are applied (see Restrictions below).

Restrictions - cache hint and mask imposed - temporary: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1454 of file memory.hpp.

◆ block_load() [10/14]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( const T *  ptr,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(const T* ptr, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (usm-bl-5) This function loads a contiguous memory block from USM pointer ptr.

If the predicate pred is set to 0, then the load is omitted and the vector pass_thru is returned.

This function has temporary restrictions. See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is the minimally required element-size alignment. Note that additional/temporary restrictions are applied (see Restrictions below).

Restrictions - cache hint and mask imposed - temporary: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1544 of file memory.hpp.

◆ block_load() [11/14]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( const T *  ptr,
size_t  byte_offset,
PropertyListT  props = {} 
)

simd<T, N> block_load(const T* ptr, size_t byte_offset, props={}); // (usm-bl-2) This function loads a contiguous memory block from address referenced by USM pointer ptr and the given byte_offset.

There may be temporary restrictions depending on L1, L2 cache hints, See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is 4-bytes for 4-byte or smaller elements and 8-bytes for 8-byte elements. The address may be element-size aligned even for byte- and word-elements, but in such case the smaller alignment property must explicitly passed to this function. Extra restrictions may be in place - see Restrictions/R1 below.

Restrictions - cache hint imposed - temporary: If L1 or L2 cache hint is passed, then: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1411 of file memory.hpp.

◆ block_load() [12/14]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( const T *  ptr,
size_t  byte_offset,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

simd<T, N> block_load(const T* ptr, size_t byte_offset, simd_mask<1> pred, props={}); // (usm-bl-4) This function loads a contiguous memory block from address referenced by USM pointer ptr and the given byte_offset.

If the predicate pred is set to 0, then the load is omitted and the returned value is undefined.

This function has temporary restrictions. See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is the minimally required element-size alignment. Note that additional/temporary restrictions are applied (see Restrictions below).

Restrictions - cache hint and mask imposed - temporary: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1500 of file memory.hpp.

◆ block_load() [13/14]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT>, simd<T, N> > sycl::_V1::ext::intel::esimd::block_load ( const T *  ptr,
size_t  byte_offset,
simd_mask< 1 >  pred,
simd< T, N >  pass_thru,
PropertyListT  props = {} 
)

simd<T, N> block_load(const T* ptr, size_t byte_offset, simd_mask<1> pred, simd<T, N> pass_thru, props={}); // (usm-bl-6) This function loads a contiguous memory block from address referenced by USM pointer ptr and the given byte_offset.

If the predicate pred is set to 0, then the load is omitted and the vector pass_thru is returned.

This function has temporary restrictions. See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is the minimally required element-size alignment. Note that additional/temporary restrictions are applied (see Restrictions below).

Restrictions - cache hint and mask imposed - temporary: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 1591 of file memory.hpp.

◆ block_load() [14/14]

template<typename Tx , int N, typename Flags = overaligned_tag<detail::OperandSize::OWORD>>
__ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>, simd<Tx, N> > sycl::_V1::ext::intel::esimd::block_load ( const Tx *  addr,
Flags   
)

Loads a contiguous block of memory from the given memory address addr and returns the loaded data as a vector.

The generated code depends on the combination {T, N, Flags}. Providing flags specifying the alignment of 16-bytes or more produces more efficient code. If the alignment is smaller than 16-bytes, then less efficient gather is generated. If the loaded vector is too long for 1 flat-load GPU instruction, then a series of flat-loads and/or gathers may be generated.

Template Parameters
TxElement type.
NNumber of elements to load.
FlagsThe alignment specifier type tag.
Parameters
addrThe address to load from.
FlagsSpecifies the alignment.
Returns
A vector of loaded elements.

Definition at line 1616 of file memory.hpp.

◆ block_store() [1/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  acc,
detail::DeviceAccessorOffsetT  byte_offset,
simd< T, N >  vals,
PropertyListT  props = {} 
)

Each of the following block_store functions stores the vector 'vals' to a contiguous memory block at the address referenced by accessor 'acc', or from 'acc + byte_offset', The parameter 'pred' is the one element predicate.

If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP. The parameter 'props' specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::cache_hint_L3, esimd::alignment. void block_store(AccessorT acc, OffsetT byte_offset, // (acc-bs-1) simd<T, N> vals, props = {}); void block_store(AccessorT acc, simd<T, N> vals, props = {}); // (acc-bs-2) void block_store(AccessorT acc, OffsetT byte_offset, // (acc-bs-3) simd<T, N> vals, simd_mask<1> pred, props = {}); void block_store(AccessorT acc, simd<T, N> vals, // (acc-bs-4) simd_mask<1> pred, props = {}); void block_store(AccessorT acc, OffsetT byte_offset, // (acc-bs-1) simd<T, N> vals, props = {}); This function stores a contiguous memory block to accessor acc and byte_offset with data specified by vals.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the byte_offset must be at least 16-byte aligned if (!(b) && (c)) from the below restrictions, and must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements otherwise. If the 'alignment' property is specified as less than 16 bytes, then the target device must be DG2 or PVC (not Gen12). The alignment requirement may be less strict if stateless memory mode is ON, see block_store(usm_ptr, props) (aka usm-bs-01) for details/requirements.

Restrictions: there may be some extra restrictions depending on a) stateless memory mode enforcement is ON, b) cache hints are used, c) number of bytes stored is either 16,32,64, or 128. d) the 'alignment' property is specified as less than 16 bytes.

If (b) || !(c) || (d), then the target device must be DG2 or PVC (not Gen12). If (a) && !(b), then there is no restriction on the number of elements to be stored and byte_offset must be only element-aligned.

Gen12 requirements: !(b) && (c) && !(d). It can store 16-, 32-, 64-, or 128-bytes only. DG2/PVC requirements: It can store such number of elements depending on the type 'T': for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more).

Definition at line 2288 of file memory.hpp.

◆ block_store() [2/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  acc,
detail::DeviceAccessorOffsetT  byte_offset,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(AccessorT acc, OffsetT byte_offset, // (acc-bs-3) simd<T, N> vals, simd_mask<1> pred, props = {}); This function stores a contiguous memory block to accessor acc and byte_offset with data specified by vals.

If the predicate pred is set to 0, then the store is omitted.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the byte_offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. The alignment requirement may be less strict if stateless memory mode is ON, see block_store(usm_ptr, props) (aka usm-bs-01) for details/requirements.

Restrictions: R1: The target device must be DG2 or PVC (not Gen12).

R2: It can store such number of elements depending on the type 'T': for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512(only if alignment is 8-bytes or more).

Definition at line 2403 of file memory.hpp.

◆ block_store() [3/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  acc,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void block_store(AccessorT acc, simd<T, N> vals, props = {}); // (acc-bs-2) This function stores a contiguous memory block to accessor acc with data specified by vals and implied offset=0.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2. Other properties are ignored. If props specifies the alignment property, then it is ignored because this variant implies zero offset, which means the most favourable 16-byte alignment is used.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Restrictions: there may be some extra restrictions depending on a) stateless memory mode enforcement is ON, b) cache hints are used, c) number of bytes stored is either 16,32,64, or 128. If (b) || !(c), then the target device must be DG2 or PVC (not Gen12). If (a) && !(b), then there is no restriction on the number of elements to be stored.

Gen12 requirements: !(b) && (c). It can store 16-, 32-, 64-, or 128-bytes only. DG2/PVC requirements: It can store such number of elements depending on the type 'T': for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512.

Definition at line 2357 of file memory.hpp.

◆ block_store() [4/8]

template<typename T , int N, typename AccessorT , typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> && detail::is_device_accessor_with_v<AccessorT, detail::accessor_mode_cap::can_write> > sycl::_V1::ext::intel::esimd::block_store ( AccessorT  acc,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(AccessorT acc, simd<T, N> vals, // (acc-bs-4) simd_mask<1> pred, props = {}); This function stores a contiguous memory block to accessor acc with data specified by vals and implied offset=0.

If the predicate pred is set to 0, then the store is omitted.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2. Other properties are ignored. If props specifies the alignment property, then it is ignored because this variant implies zero offset, which means the most favourable 16-byte alignment is used.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Restrictions: R1: The target device must be DG2 or PVC (not Gen12).

R2: It can store such number of elements depending on the type 'T': for 8-byte data: 1, 2, 3, 4, 8, 16, 32(max for DG2), 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64(max for DG2), or 128; for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128(max for DG2), or 256; for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256(max for DG2), or 512.

Definition at line 2443 of file memory.hpp.

◆ block_store() [5/8]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t<detail::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( T *  ptr,
simd< T, N >  vals,
PropertyListT  = {} 
)

Each of the following block store functions stores a contiguous memory block to the address referenced by the USM pointer 'ptr', or from 'ptr + offset', where 'offset' is the offset in bytes (not in elements!) with data specified by 'vals'.

The parameter 'pred' is the one element predicate. If it is set to 1, then all 'N' elements are stored. Otherwise, the block store operation is a NO-OP. The parameter 'props' specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::cache_hint_L3, esimd::alignment.

void block_store(T* ptr, simd<T, N> vals, props={}); // (usm-bs-1) void block_store(T* ptr, size_t byte_offset, // (usm-bs-2) simd<T, N> vals, props={}); void block_store(T* ptr, simd<T, N> vals, // (usm-bs-3) simd_mask<1> pred, props={}); void block_store(T* ptr, size_t byte_offset, // (usm-bs-4) simd<T, N> vals, simd_mask<1> pred, props={});

void block_store(T* ptr, simd<T, N> vals, props={}); // (usm-bs-1) This function stores a contiguous memory block to USM pointer ptr with data specified by vals.

There may be temporary restrictions depending on L1, L2 cache hints, See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is 16 bytes if props does not specify any L1 or L2 cache hints, and the minimally required element-size alignment otherwise. Note that additional/temporary restrictions may apply (see Restrictions below).

Restrictions - cache hint imposed - temporary: If L1 or L2 cache hint is passed, then: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 2064 of file memory.hpp.

◆ block_store() [6/8]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t<detail::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( T *  ptr,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  = {} 
)

void block_store(T* ptr, simd<T, N> vals, // (usm-bs-3) simd_mask<1> pred, props={}); This function stores a contiguous memory block to USM pointer ptr with data specified by vals.

If the predicate pred is set to 0, then the store is omitted.

There are temporary restrictions. See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is the minimally required element-size alignment. Note that additional/temporary restrictions apply (see Restrictions below).

Restrictions - predicate imposed - temporary: R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 2163 of file memory.hpp.

◆ block_store() [7/8]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( T *  ptr,
size_t  byte_offset,
simd< T, N >  vals,
PropertyListT  props = {} 
)

void block_store(T* ptr, size_t byte_offset, // (usm-bs-2) simd<T, N> vals, props={}); This function stores a contiguous memory block to USM pointer ptr and byte-offset byte_offset with data specified by vals.

There may be temporary restrictions depending on L1, L2 cache hints, See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is 16 bytes if props does not specify any L1 or L2 cache hints, and the minimally required element-size alignment otherwise. Note that additional/temporary restrictions may apply (see Restrictions below).

Restrictions - cache hint imposed - temporary: If L1 or L2 cache hint is passed, then: R1: The pointer plus byte offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 2120 of file memory.hpp.

◆ block_store() [8/8]

template<typename T , int N, typename PropertyListT = ext::oneapi::experimental::detail::empty_properties_t>
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v<PropertyListT> > sycl::_V1::ext::intel::esimd::block_store ( T *  ptr,
size_t  byte_offset,
simd< T, N >  vals,
simd_mask< 1 >  pred,
PropertyListT  props = {} 
)

void block_store(T* ptr, size_t byte_offset, // (usm-bs-4) simd<T, N> vals, simd_mask<1> pred, props={}); This function stores a contiguous memory block to USM pointer ptr and byte-offset byte_offset with data specified by vals.

If the predicate pred is set to 0, then the store is omitted.

There may be temporary restrictions depending on L1, L2 cache hints, See details in the 'Restrictions' section below. The restrictions will be relaxed in the future.

The parameter props specifies the optional compile-time properties of the type esimd::properties and may include esimd::cache_hint_L1, esimd::cache_hint_L2, esimd::alignment. Other properties are ignored.

Cache hints: If props does not specify any L1 or L2 cache hints, then the cache_hint::none value is assumed by default.

Alignment: If props does not specify the 'alignment' property, then the default assumed alignment is 16 bytes if props does not specify any L1 or L2 cache hints and pred is set to 1, and Note that additional/temporary restrictions may apply (see Restrictions below).

Restrictions - cache hint or predicate imposed - temporary: If a predicate, L1 or L2 cache hint is passed, then: R1: The pointer plus byte offset must be at least 4-byte aligned for elements of 4-bytes or smaller and 8-byte aligned for 8-byte elements. R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64; for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64, or 128(only if alignment is 8-bytes or more); for 2-byte data: 2, 4, 6, 8, 16, 32, 64, 128, or 256(only if alignment is 8-bytes or more); for 1-byte data: 4, 8, 12, 16, 32, 64, 128, 256, or 512(only if alignment is 8-bytes or more). R3: The target device must be DG2, PVC or newer GPU.

Definition at line 2212 of file memory.hpp.