23 inline namespace _V1 {
24 namespace ext::intel::esimd {
55 template <
typename VT,
typename ET = detail::element_type_t<VT>>
63 template <
typename VT>
static constexpr
unsigned alignment =
alignof(VT);
73 "Alignment value N for overaligned_tag<N> must be a power of two");
74 template <
typename>
static constexpr
unsigned alignment = N;
106 struct dqword_element_aligned_tag {
107 template <
typename VT,
typename ET = detail::element_type_t<VT>>
108 static constexpr
unsigned alignment =
alignof(ET) > 4 ?
alignof(ET) : 4;
111 inline constexpr dqword_element_aligned_tag dqword_element_aligned = {};
115 template <
class T,
int N,
size_t... Is>
116 constexpr vector_type_t<T, N> make_vector_impl(
const T (&&Arr)[N],
117 std::index_sequence<Is...>) {
118 return vector_type_t<T, N>{Arr[Is]...};
121 template <
class T,
int N>
122 constexpr vector_type_t<T, N> make_vector(
const T (&&Arr)[N]) {
123 return make_vector_impl<T, N>(std::move(Arr), std::make_index_sequence<N>{});
126 template <
class T,
int N,
size_t... Is>
127 constexpr
auto make_vector_impl(T Base, T Stride, std::index_sequence<Is...>) {
128 if constexpr (std::is_integral_v<T> && N <= 3) {
130 return vector_type_t<T, N>{(T)(Base + ((T)Is) * Stride)...};
132 using CppT =
typename element_type_traits<T>::EnclosingCppT;
134 CppT StrideCpp = Stride;
135 vector_type_t<CppT, N> VBase = BaseCpp;
136 vector_type_t<CppT, N> VStride = StrideCpp;
137 vector_type_t<CppT, N> VStrideCoef{(CppT)(Is)...};
138 vector_type_t<CppT, N> Result{VBase + VStride * VStrideCoef};
139 return wrapper_type_converter<T>::template to_vector<N>(Result);
143 template <
class T,
int N> constexpr
auto make_vector(T Base, T Stride) {
144 return make_vector_impl<T, N>(Base, Stride, std::make_index_sequence<N>{});
174 template <
typename RawTy,
int N,
class Derived,
class SFINAE>
175 #ifndef __SYCL_DEVICE_ONLY__
178 class [[__sycl_detail__::__uses_aspects__(
188 template <
typename,
typename>
friend class simd_view;
190 template <
typename,
int>
friend class simd;
212 template <
bool UseSet = true>
213 void init_from_array(
const Ty (&&Arr)[N])
noexcept {
216 if constexpr (is_wrapper_elem_type_v<Ty>) {
217 for (
auto I = 0; I < N; ++I) {
218 tmp[I] = bitcast_to_raw_type(Arr[I]);
221 tmp = make_vector(std::move(Arr));
223 if constexpr (UseSet) {
226 M_data = std::move(tmp);
236 Derived &cast_this_to_derived() {
return reinterpret_cast<Derived &
>(*this); }
237 const Derived &cast_this_to_derived()
const {
238 return reinterpret_cast<const Derived &
>(*this);
261 template <
class Ty1,
typename Derived1>
264 set(convert_vector<Ty, element_type_t<Derived1>, N>(other.
data()));
284 M_data = make_vector<Ty, N>(Base, Step);
293 class = std::enable_if_t<detail::is_valid_simd_elem_type_v<T1>>>
296 M_data = bitcast_to_raw_type(detail::convert_scalar<Ty>(Val));
303 template <
int N1,
class = std::enable_if_t<N1 == N>>
306 init_from_array<
false >(std::move(Arr));
322 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
340 typename = std::enable_if_t<
341 detail::is_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
342 is_simd_flag_type_v<Flags>>>
344 #ifdef __ESIMD_FORCE_STATELESS_MEM
351 #ifdef __ESIMD_FORCE_STATELESS_MEM
363 return cast_this_to_derived();
369 typename = std::enable_if_t<T::length == 1>>
370 operator Ty()
const {
372 return bitcast_to_wrapper_type<Ty>(
data()[0]);
378 #ifndef __SYCL_DEVICE_ONLY__
381 return __esimd_vload<RawTy, N>(&M_data);
396 Derived &
write(
const Derived &Val) {
398 return cast_this_to_derived();
407 void merge(
const Derived &Val,
const simd_mask_type<N> &Mask) {
408 set(__esimd_wrregion<RawTy, N, N, 0 /*VS*/, N, 1, N>(
data(), Val.data(), 0,
419 void merge(
const Derived &Val1, Derived Val2,
const simd_mask_type<N> &Mask) {
420 Val2.merge(Val1, Mask);
431 using TopRegionTy = compute_format_type_t<Derived, EltTy>;
433 return RetTy{cast_this_to_derived(), TopRegionTy{0}};
446 template <
typename EltTy,
int Height,
int W
idth>
448 using TopRegionTy = compute_format_type_2d_t<Derived, EltTy, Height, Width>;
450 return RetTy{cast_this_to_derived(), TopRegionTy{0, 0}};
460 template <
int Size,
int Str
ide>
462 select(uint16_t Offset = 0) &[[clang::lifetimebound]] {
463 static_assert(Size > 1 || Stride == 1,
464 "Stride must be 1 in single-element region");
465 region1d_t<Ty, Size, Stride> Reg(Offset);
466 return {cast_this_to_derived(), std::move(Reg)};
476 template <
int Size,
int Str
ide>
477 resize_a_simd_type_t<Derived, Size>
select(uint16_t Offset = 0) && {
478 static_assert(Size > 1 || Stride == 1,
479 "Stride must be 1 in single-element region");
480 Derived &&Val = std::move(cast_this_to_derived());
481 return __esimd_rdregion<RawTy, N, Size, 0, Size, Stride>(Val.data(),
494 [[clang::lifetimebound]] {
495 return select<1, 1>(i);
504 resize_a_simd_type_t<Derived, Size>
506 vector_type_t<uint16_t, Size> Offsets = Indices.
data() *
sizeof(RawTy);
507 return __esimd_rdindirect<RawTy, N, Size>(
data(), Offsets);
515 Val[Index] = bitcast_to_raw_type(V);
526 const resize_a_simd_type_t<Derived, Size> &Val,
527 const simd_mask_type<Size> &Mask) {
528 vector_type_t<uint16_t, Size> Offsets = Indices.
data() *
sizeof(RawTy);
529 set(__esimd_wrindirect<RawTy, N, Size>(
data(), Val.data(), Offsets,
536 template <
int Rep> resize_a_simd_type_t<Derived, Rep * N>
replicate()
const {
537 return replicate_w<Rep, N>(0);
547 template <
int Rep,
int W>
548 resize_a_simd_type_t<Derived, Rep * W>
replicate_w(uint16_t Offset)
const {
549 return replicate_vs_w_hs<Rep, 0, W, 1>(Offset);
561 template <
int Rep,
int VS,
int W>
563 return replicate_vs_w_hs<Rep, VS, W, 1>(Offset);
614 template <
int Rep,
int VS,
int W,
int HS>
615 resize_a_simd_type_t<Derived, Rep * W>
617 return __esimd_rdregion<RawTy, N, Rep * W, VS, W, HS, N>(
618 data(), Offset *
sizeof(RawTy));
624 template <
typename T1 = Ty,
625 typename = std::enable_if_t<std::is_integral_v<T1>>>
627 return __esimd_any<Ty, N>(
data());
633 template <
typename T1 = Ty,
634 typename = std::enable_if_t<std::is_integral_v<T1>>>
636 return __esimd_all<Ty, N>(
data());
642 template <
typename RTy,
class ElemTy = __raw_t<
typename RTy::element_type>>
644 const vector_type_t<ElemTy, RTy::length> &Val) {
646 if constexpr (N *
sizeof(RawTy) ==
RTy::length *
sizeof(ElemTy))
648 set(bitcast<RawTy, ElemTy, RTy::length>(Val));
650 static_assert(!RTy::Is_2D);
652 auto Base = bitcast<ElemTy, RawTy, N>(
data());
653 constexpr
int BN = (N *
sizeof(RawTy)) /
sizeof(ElemTy);
655 constexpr
int M = RTy::Size_x;
656 constexpr
int Stride = RTy::Stride_x;
657 uint16_t Offset = Region.M_offset_x *
sizeof(ElemTy);
658 static_assert(M > 0,
"Malformed RHS region.");
659 static_assert(M <= BN,
"Attempt to write beyond viewed area: The viewed "
660 "object in LHS does not fit RHS.");
663 static_assert((M > BN) || (M - 1) * Stride < BN,
664 "Malformed RHS region - too big stride.");
667 auto Merged = __esimd_wrregion<ElemTy, BN, M,
668 0, M, Stride>(Base, Val, Offset);
670 set(bitcast<RawTy, ElemTy, BN>(Merged));
676 template <
typename TR,
typename UR,
677 class ElemTy = __raw_t<typename TR::element_type>>
679 const vector_type_t<ElemTy, TR::length> &Val) {
681 using PaTy =
typename shape_type<UR>::type;
682 using BT = __raw_t<typename PaTy::element_type>;
685 if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) {
686 writeRegion(Region.second, bitcast<BT, ElemTy, TR::length>(Val));
689 auto Base = readRegion<RawTy, N>(
data(), Region.second);
691 auto Base1 = bitcast<ElemTy, BT, BN>(Base);
692 constexpr
int BN1 = PaTy::Size_in_bytes /
sizeof(ElemTy);
694 if constexpr (!TR::Is_2D) {
696 constexpr
int M = TR::Size_x;
697 constexpr
int Stride = TR::Stride_x;
698 uint16_t Offset = Region.first.M_offset_x *
sizeof(ElemTy);
700 static_assert(M <= BN1,
"Attempt to write beyond viewed area: The "
701 "viewed object in LHS does not fit RHS.");
702 static_assert(M > 0,
"Malformed RHS region.");
703 static_assert((M - 1) * Stride < BN,
704 "Malformed RHS region - too big stride.");
706 Base1 = __esimd_wrregion<ElemTy, BN1, M,
707 0, M, Stride>(Base1, Val, Offset);
709 static_assert(std::is_same<ElemTy, BT>::value);
712 constexpr
int VS = PaTy::Size_x * TR::Stride_y;
713 constexpr
int W = TR::Size_x;
714 constexpr
int HS = TR::Stride_x;
715 constexpr
int ParentWidth = PaTy::Size_x;
718 uint16_t Offset =
static_cast<uint16_t
>(
719 (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) *
722 static_assert(M <= BN1,
"Attempt to write beyond viewed area: The "
723 "viewed object in LHS does not fit RHS.");
724 static_assert(M > 0 && W > 0 && M % W == 0,
"Malformed RHS region.");
725 static_assert(W == 0 || ((M / W) - 1) * VS + (W - 1) * HS < BN1,
726 "Malformed RHS region - too big vertical and/or "
727 "horizontal stride.");
729 Base1 = __esimd_wrregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(
733 auto Merged1 = bitcast<BT, ElemTy, BN1>(Base1);
751 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
752 ESIMD_INLINE
void copy_from(
const Ty *addr, Flags = {}) SYCL_ESIMD_FUNCTION;
767 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
768 ESIMD_INLINE EnableIfAccessor<AccessorT, accessor_mode_cap::can_read, void>
770 #ifdef __ESIMD_FORCE_STATELESS_MEM
775 Flags = {}) SYCL_ESIMD_FUNCTION;
790 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
791 ESIMD_INLINE std::enable_if_t<
792 detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_read>,
794 copy_from(AccessorT acc, uint32_t offset, Flags = {}) SYCL_ESIMD_FUNCTION;
803 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
804 ESIMD_INLINE
void copy_to(Ty *addr, Flags = {})
const SYCL_ESIMD_FUNCTION;
817 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
818 ESIMD_INLINE EnableIfAccessor<AccessorT, accessor_mode_cap::can_write, void>
820 #ifdef __ESIMD_FORCE_STATELESS_MEM
825 Flags = {})
const SYCL_ESIMD_FUNCTION;
838 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
839 ESIMD_INLINE std::enable_if_t<
840 detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_write>,
842 copy_to(AccessorT acc, uint32_t offset, Flags = {})
const SYCL_ESIMD_FUNCTION;
849 template <
class T1 = Ty,
class = std::enable_if_t<std::is_
integral_v<T1>>>
852 detail::vector_unary_op<detail::UnaryOp::bit_not, T1, N>(
data())};
861 template <
class T1 = Ty,
class = std::enable_if_t<std::is_
integral_v<T1>>>
866 #define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \
872 template <class T1, class SimdT, \
873 class = std::enable_if_t<(is_simd_type_v<Derived> == \
874 is_simd_type_v<SimdT>)&&COND>> \
875 Derived &operator OPASSIGN( \
876 const __ESIMD_DNS::simd_obj_impl<T1, N, SimdT> &RHS) { \
877 auto Res = *this BINOP RHS; \
878 using ResT = decltype(Res); \
879 set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
880 length>(Res.data())); \
881 return cast_this_to_derived(); \
889 template <class SimdT1, class RegionT1, \
890 class T1 = typename RegionT1::element_type, \
891 class = std::enable_if_t< \
892 (is_simd_type_v<Derived> == \
893 is_simd_type_v<SimdT1>)&&(RegionT1::length == length) && \
895 Derived &operator OPASSIGN( \
896 const __ESIMD_NS::simd_view<SimdT1, RegionT1> &RHS) { \
897 auto Res = *this BINOP RHS.read(); \
898 using ResT = decltype(Res); \
899 set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
900 length>(Res.data())); \
901 return cast_this_to_derived(); \
907 template <class T1, class = std::enable_if_t<COND>> \
908 Derived &operator OPASSIGN(T1 RHS) { \
909 if constexpr (is_simd_type_v<Derived>) { \
910 using RHSVecT = __ESIMD_DNS::construct_a_simd_type_t<Derived, T1, N>; \
911 return *this OPASSIGN RHSVecT(RHS); \
913 return *this OPASSIGN Derived((RawTy)RHS); \
919 #define __ESIMD_BITWISE_OP_FILTER \
920 std::is_integral_v<element_type> &&std::is_integral_v<T1>
933 #undef __ESIMD_BITWISE_OP_FILTER
937 #define __ESIMD_SHIFT_OP_FILTER \
938 std::is_integral_v<element_type> &&std::is_integral_v<T1> \
939 &&__ESIMD_DNS::is_simd_type_v<Derived>
948 #undef __ESIMD_SHIFT_OP_FILTER
953 #define __ESIMD_ARITH_OP_FILTER \
954 __ESIMD_DNS::is_simd_type_v<Derived> &&__ESIMD_DNS::is_vectorizable_v<T1>
964 #undef __ESIMD_ARITH_OP_FILTER
965 #undef __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN
968 __ESIMD_DECLARE_TEST_PROXY_ACCESS
974 template <
int ChunkSize,
typename Flags,
typename AccessorT,
typename TOffset>
975 ESIMD_INLINE
void copy_to_impl(AccessorT acc,
976 TOffset offset)
const SYCL_ESIMD_FUNCTION;
977 template <
int ChunkSize,
typename Flags,
typename AccessorT,
typename TOffset>
978 ESIMD_INLINE
void copy_from_impl(AccessorT acc,
979 TOffset offset) SYCL_ESIMD_FUNCTION;
983 __ESIMD_DECLARE_TEST_PROXY
986 #ifndef __SYCL_DEVICE_ONLY__
989 __esimd_vstore<RawTy, N>(&M_data, Val);
This class is a simd_obj_impl specialization representing a simd mask, which is basically a simd_obj_...
This is a base class for all ESIMD simd classes with real storage (simd, simd_mask_impl).
ESIMD_INLINE void writeRegion(std::pair< TR, UR > Region, const vector_type_t< ElemTy, TR::length > &Val)
Write a simd_obj_impl-vector into a nested region of a simd_obj_impl object.
simd_obj_impl(T1 Val) noexcept
Broadcast constructor.
resize_a_simd_type_t< Derived, Size > iselect(const simd< uint16_t, Size > &Indices)
Indirect select - select and extract multiple elements with given variable indices.
resize_a_simd_type_t< Derived, Rep *N > replicate() const
Replicates contents of this vector a number of times into a new vector.
Derived & operator=(const simd_obj_impl &other) noexcept
Copy assignment operator.
ESIMD_INLINE EnableIfAccessor< AccessorT, accessor_mode_cap::can_write, void > copy_to(AccessorT acc, uint32_t offset, Flags={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
get_vector_element_type< Derived > element_type
Element type of the derived (user) class.
raw_vector_type & data_ref()
Derived & write(const Derived &Val)
Replaces the underlying data with the one taken from another object.
Ty operator[](int i) const
Get value of this vector's element.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_read >, void > copy_from(AccessorT acc, uint32_t offset, Flags={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
static constexpr int length
The number of elements in this object.
simd_obj_impl(Ty Base, Ty Step) noexcept
Arithmetic progression constructor.
auto bit_cast_view() &
Create a 2-dimensional view of this object.
uint16_t all() const
See if all elements are non-zero.
ESIMD_INLINE EnableIfAccessor< AccessorT, accessor_mode_cap::can_read, void > copy_from(AccessorT acc, uint32_t offset, Flags={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
simd_obj_impl(const simd_obj_impl< Ty1, N, Derived1, SFINAE > &other)
Implicit conversion constructor from another simd_obj_impl object.
void iupdate(const simd< uint16_t, Size > &Indices, const resize_a_simd_type_t< Derived, Size > &Val, const simd_mask_type< Size > &Mask)
Indirect update - update multiple elements with given variable indices.
simd_obj_impl(const simd_obj_impl &other)
Copy constructor.
void merge(const Derived &Val, const simd_mask_type< N > &Mask)
"Merges" this object's value with another object: replaces part of the underlying data with the one t...
simd_mask_type< N > operator!() const
Unary logical negation operator, available in all subclasses, but only for integral element types (si...
simd_obj_impl(AccessorT acc, uint32_t offset, Flags={}) noexcept
Accessor-based load constructor.
simd_obj_impl()=default
Default constructor.
raw_vector_type data() const
void iupdate(ushort Index, Ty V)
Update single element with variable index.
RawTy raw_element_type
The element type of the raw storage vector.
ESIMD_INLINE void writeRegion(RTy Region, const vector_type_t< ElemTy, RTy::length > &Val)
Write a simd_obj_impl-vector into a basic region of a simd_obj_impl object.
simd_obj_impl(const Ty(&&Arr)[N1]) noexcept
Rvalue array-based constructor.
void merge(const Derived &Val1, Derived Val2, const simd_mask_type< N > &Mask)
Merges given two objects with a mask and writes resulting data into this object.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_write >, void > copy_to(AccessorT acc, uint32_t offset, Flags={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
Derived operator~() const
Per-element bitwise inversion, available in all subclasses, but only for integral element types (simd...
auto bit_cast_view() &
Create a 1-dimensional view of this object.
resize_a_simd_type_t< Derived, Rep *W > replicate_w(uint16_t Offset) const
Shortcut to replicate_vs_w_hs with VS=0 and HS=1 to replicate a single "dense" (w/o gaps between elem...
ESIMD_INLINE void copy_from(const Ty *addr, Flags={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
ESIMD_INLINE void copy_to(Ty *addr, Flags={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
resize_a_simd_type_t< Derived, Rep *W > replicate_vs_w_hs(uint16_t Offset) const
This function "replicates" a portion of this object's elements into a new object.
__ESIMD_DECLARE_TEST_PROXY void set(const raw_vector_type &Val)
simd_view< Derived, region1d_t< Ty, Size, Stride > > select(uint16_t Offset=0) &
Select elements of this object into a subregion and create a 1D view for for it.
simd_obj_impl(const raw_vector_type &Val)
Implicit conversion constructor from a raw vector object.
resize_a_simd_type_t< Derived, Size > select(uint16_t Offset=0) &&
Select and extract a subregion of this object's elements and return it as a new vector object.
simd_view< Derived, region1d_scalar_t< Ty > > operator[](int i)
Return writable view of a single element.
vector_type_t< RawTy, N > raw_vector_type
The underlying raw storage vector data type.
simd_obj_impl(const Ty *ptr, Flags={}) noexcept
Pointer-based load constructor.
uint16_t any() const
See if any element is non-zero.
resize_a_simd_type_t< Derived, Rep *W > replicate_vs_w(uint16_t Offset) const
Shortcut to replicate_vs_w_hs with HS=1 to replicate dense blocks.
Base class for "simd view" types.
This class represents a reference to a sub-region of a base simd object.
The main simd vector class.
#define __esimd_dbg_print(a)
constexpr vector_aligned_tag vector_aligned
static constexpr bool is_simd_flag_type_v
Checks if given type is a simd load/store flag.
constexpr overaligned_tag< N > overaligned
constexpr element_aligned_tag element_aligned
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
constexpr alignment_key::value_t< K > alignment
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
#define __ESIMD_SHIFT_OP_FILTER
#define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND)
#define __ESIMD_BITWISE_OP_FILTER
#define __ESIMD_ARITH_OP_FILTER
_Abi const simd< _Tp, _Abi > & noexcept
element_aligned_tag type.
static constexpr unsigned alignment
Checks if type is a simd load/store flag.
static constexpr unsigned alignment
static constexpr unsigned alignment