23 namespace ext::intel::esimd {
54 template <
typename VT,
typename ET = detail::element_type_t<VT>>
62 template <
typename VT>
static constexpr
unsigned alignment =
alignof(VT);
72 "Alignment value N for overaligned_tag<N> must be a power of two");
73 template <
typename>
static constexpr
unsigned alignment = N;
105 struct dqword_element_aligned_tag {
106 template <
typename VT,
typename ET = detail::element_type_t<VT>>
107 static constexpr
unsigned alignment =
alignof(ET) > 4 ?
alignof(ET) : 4;
110 inline constexpr dqword_element_aligned_tag dqword_element_aligned = {};
114 template <
class T,
int N,
size_t... Is>
116 std::index_sequence<Is...>) {
120 template <
class T,
int N>
122 return make_vector_impl<T, N>(std::move(Arr), std::make_index_sequence<N>{});
125 template <
class T,
int N,
size_t... Is>
127 std::index_sequence<Is...>) {
131 template <
class T,
int N>
133 return make_vector_impl<T, N>(Base, Stride, std::make_index_sequence<N>{});
163 template <
typename RawTy,
int N,
class Derived,
class SFINAE>
172 template <
typename,
typename>
friend class simd_view;
174 template <
typename,
int>
friend class simd;
190 static constexpr
int length = N;
196 template <
bool UseSet = true>
197 void init_from_array(
const Ty (&&Arr)[N]) noexcept {
200 if constexpr (is_wrapper_elem_type_v<Ty>) {
201 for (
auto I = 0; I < N; ++I) {
202 tmp[I] = bitcast_to_raw_type(Arr[I]);
205 tmp = make_vector(std::move(Arr));
207 if constexpr (UseSet) {
210 M_data = std::move(tmp);
214 explicit operator raw_vector_type()
const {
220 Derived &cast_this_to_derived() {
return reinterpret_cast<Derived &
>(*this); }
221 const Derived &cast_this_to_derived()
const {
222 return reinterpret_cast<const Derived &
>(*this);
230 simd_obj_impl() =
default;
245 template <
class Ty1,
typename Derived1>
248 set(convert_vector<Ty, element_type_t<Derived1>, N>(other.
data()));
266 if constexpr (is_wrapper_elem_type_v<Ty> || !std::is_integral_v<Ty>) {
267 for (
int i = 0; i < N; ++i) {
268 M_data[i] = bitcast_to_raw_type(Val);
269 Val = binary_op<BinOp::add, Ty>(Val, Step);
272 M_data = make_vector<Ty, N>(Val, Step);
282 class = std::enable_if_t<detail::is_valid_simd_elem_type_v<T1>>>
285 M_data = bitcast_to_raw_type(detail::convert_scalar<Ty>(Val));
292 template <
int N1,
class = std::enable_if_t<N1 == N>>
295 init_from_array<
false >(std::move(Arr));
311 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
314 copy_from(ptr, Flags{});
329 typename = std::enable_if_t<
330 detail::is_sycl_accessor_with<AccessorT, accessor_mode_cap::can_read,
331 sycl::access::target::device>::value &&
332 is_simd_flag_type_v<Flags>>>
335 copy_from(acc, offset, Flags{});
340 template <
typename T = simd_obj_impl,
341 typename = std::enable_if_t<T::length == 1>>
342 operator Ty()
const {
344 return bitcast_to_wrapper_type<Ty>(data()[0]);
350 #ifndef __SYCL_DEVICE_ONLY__
353 return __esimd_vload<RawTy, N>(&M_data);
365 "commit is deprecated and will be removed in a future release")
370 Derived
read()
const {
return Derived{data()}; }
374 Derived &
write(
const Derived &Val) {
376 return cast_this_to_derived();
385 void merge(
const Derived &Val,
const simd_mask_type<N> &Mask) {
386 set(__esimd_wrregion<RawTy, N, N, 0 /*VS*/, N, 1, N>(data(), Val.data(), 0,
397 void merge(
const Derived &Val1, Derived Val2,
const simd_mask_type<N> &Mask) {
398 Val2.merge(Val1, Mask);
409 using TopRegionTy = compute_format_type_t<Derived, EltTy>;
411 return RetTy{cast_this_to_derived(), TopRegionTy{0}};
424 template <
typename EltTy,
int Height,
int W
idth>
426 using TopRegionTy = compute_format_type_2d_t<Derived, EltTy, Height, Width>;
428 return RetTy{cast_this_to_derived(), TopRegionTy{0, 0}};
438 template <
int Size,
int Str
ide>
440 select(uint16_t Offset = 0) &[[clang::lifetimebound]] {
441 static_assert(Size > 1 || Stride == 1,
442 "Stride must be 1 in single-element region");
443 region1d_t<Ty, Size, Stride> Reg(Offset);
444 return {cast_this_to_derived(), std::move(Reg)};
454 template <
int Size,
int Str
ide>
455 resize_a_simd_type_t<Derived, Size>
select(uint16_t Offset = 0) && {
456 static_assert(Size > 1 || Stride == 1,
457 "Stride must be 1 in single-element region");
458 Derived &&Val = std::move(cast_this_to_derived());
459 return __esimd_rdregion<RawTy, N, Size, 0, Size, Stride>(Val.data(),
466 Ty
operator[](
int i)
const {
return bitcast_to_wrapper_type<Ty>(data()[i]); }
472 [[clang::lifetimebound]] {
473 return select<1, 1>(i);
482 resize_a_simd_type_t<Derived, Size>
484 vector_type_t<uint16_t, Size> Offsets = Indices.data() *
sizeof(RawTy);
485 return __esimd_rdindirect<RawTy, N, Size>(data(), Offsets);
493 Val[Index] = bitcast_to_raw_type(V);
504 const resize_a_simd_type_t<Derived, Size> &Val,
505 const simd_mask_type<Size> &Mask) {
506 vector_type_t<uint16_t, Size> Offsets = Indices.data() *
sizeof(RawTy);
507 set(__esimd_wrindirect<RawTy, N, Size>(data(), Val.data(), Offsets,
514 template <
int Rep> resize_a_simd_type_t<Derived, Rep * N>
replicate()
const {
515 return replicate_w<Rep, N>(0);
525 template <
int Rep,
int W>
526 resize_a_simd_type_t<Derived, Rep * W>
replicate_w(uint16_t Offset)
const {
527 return replicate_vs_w_hs<Rep, 0, W, 1>(Offset);
539 template <
int Rep,
int VS,
int W>
541 return replicate_vs_w_hs<Rep, VS, W, 1>(Offset);
592 template <
int Rep,
int VS,
int W,
int HS>
593 resize_a_simd_type_t<Derived, Rep * W>
595 return __esimd_rdregion<RawTy, N, Rep * W, VS, W, HS, N>(
596 data(), Offset *
sizeof(RawTy));
602 template <
typename T1 = Ty,
603 typename = std::enable_if_t<std::is_integral_v<T1>>>
605 return __esimd_any<Ty, N>(data());
611 template <
typename T1 = Ty,
612 typename = std::enable_if_t<std::is_integral_v<T1>>>
614 return __esimd_all<Ty, N>(data());
620 template <
typename RTy,
class ElemTy = __raw_t<
typename RTy::element_type>>
622 const vector_type_t<ElemTy, RTy::length> &Val) {
624 if constexpr (N *
sizeof(RawTy) == RTy::length *
sizeof(ElemTy))
626 set(bitcast<RawTy, ElemTy, RTy::length>(Val));
628 static_assert(!RTy::Is_2D);
630 auto Base = bitcast<ElemTy, RawTy, N>(data());
631 constexpr
int BN = (N *
sizeof(RawTy)) /
sizeof(ElemTy);
633 constexpr
int M = RTy::Size_x;
634 constexpr
int Stride = RTy::Stride_x;
635 uint16_t Offset = Region.M_offset_x *
sizeof(ElemTy);
638 auto Merged = __esimd_wrregion<ElemTy, BN, M,
639 0, M, Stride>(Base, Val, Offset);
641 set(bitcast<RawTy, ElemTy, BN>(Merged));
647 template <
typename TR,
typename UR,
648 class ElemTy = __raw_t<typename TR::element_type>>
650 const vector_type_t<ElemTy, TR::length> &Val) {
652 using PaTy =
typename shape_type<UR>::type;
653 using BT = __raw_t<typename PaTy::element_type>;
654 constexpr
int BN = PaTy::length;
656 if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) {
657 writeRegion(Region.second, bitcast<BT, ElemTy, TR::length>(Val));
660 auto Base = readRegion<RawTy, N>(data(), Region.second);
662 auto Base1 = bitcast<ElemTy, BT, BN>(Base);
663 constexpr
int BN1 = PaTy::Size_in_bytes /
sizeof(ElemTy);
665 if constexpr (!TR::Is_2D) {
667 constexpr
int M = TR::Size_x;
668 constexpr
int Stride = TR::Stride_x;
669 uint16_t Offset = Region.first.M_offset_x *
sizeof(ElemTy);
672 Base1 = __esimd_wrregion<ElemTy, BN1, M,
673 0, M, Stride>(Base1, Val, Offset);
675 static_assert(std::is_same<ElemTy, BT>::value);
677 constexpr
int M = TR::length;
678 constexpr
int VS = PaTy::Size_x * TR::Stride_y;
679 constexpr
int W = TR::Size_x;
680 constexpr
int HS = TR::Stride_x;
681 constexpr
int ParentWidth = PaTy::Size_x;
684 uint16_t Offset =
static_cast<uint16_t
>(
685 (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) *
689 Base1 = __esimd_wrregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(
693 auto Merged1 = bitcast<BT, ElemTy, BN1>(Base1);
695 writeRegion(Region.second, Merged1);
711 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
712 ESIMD_INLINE
void copy_from(
const Ty *addr, Flags = {}) SYCL_ESIMD_FUNCTION;
727 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
728 ESIMD_INLINE EnableIfAccessor<AccessorT, accessor_mode_cap::can_read,
729 sycl::access::target::device,
void>
730 copy_from(AccessorT acc, uint32_t offset, Flags = {}) SYCL_ESIMD_FUNCTION;
739 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
740 ESIMD_INLINE
void copy_to(Ty *addr, Flags = {})
const SYCL_ESIMD_FUNCTION;
753 typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
754 ESIMD_INLINE EnableIfAccessor<AccessorT, accessor_mode_cap::can_write,
755 sycl::access::target::device,
void>
756 copy_to(AccessorT acc, uint32_t offset, Flags = {})
const SYCL_ESIMD_FUNCTION;
763 template <
class T1 = Ty,
class = std::enable_if_t<std::is_
integral_v<T1>>>
766 detail::vector_unary_op<detail::UnaryOp::bit_not, T1, N>(data())};
775 template <
class T1 = Ty,
class = std::enable_if_t<std::is_
integral_v<T1>>>
780 #define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \
786 template <class T1, class SimdT, \
787 class = std::enable_if_t<(is_simd_type_v<Derived> == \
788 is_simd_type_v<SimdT>)&&COND>> \
789 Derived &operator OPASSIGN( \
790 const __ESIMD_DNS::simd_obj_impl<T1, N, SimdT> &RHS) { \
791 auto Res = *this BINOP RHS; \
792 using ResT = decltype(Res); \
793 set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
794 length>(Res.data())); \
795 return cast_this_to_derived(); \
803 template <class SimdT1, class RegionT1, \
804 class T1 = typename RegionT1::element_type, \
805 class = std::enable_if_t< \
806 (is_simd_type_v<Derived> == \
807 is_simd_type_v<SimdT1>)&&(RegionT1::length == length) && \
809 Derived &operator OPASSIGN( \
810 const __ESIMD_NS::simd_view<SimdT1, RegionT1> &RHS) { \
811 auto Res = *this BINOP RHS.read(); \
812 using ResT = decltype(Res); \
813 set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
814 length>(Res.data())); \
815 return cast_this_to_derived(); \
821 template <class T1, class = std::enable_if_t<COND>> \
822 Derived &operator OPASSIGN(T1 RHS) { \
823 if constexpr (is_simd_type_v<Derived>) { \
824 using RHSVecT = __ESIMD_DNS::construct_a_simd_type_t<Derived, T1, N>; \
825 return *this OPASSIGN RHSVecT(RHS); \
827 return *this OPASSIGN Derived((RawTy)RHS); \
833 #define __ESIMD_BITWISE_OP_FILTER \
834 std::is_integral_v<element_type> &&std::is_integral_v<T1>
847 #undef __ESIMD_BITWISE_OP_FILTER
851 #define __ESIMD_SHIFT_OP_FILTER \
852 std::is_integral_v<element_type> &&std::is_integral_v<T1> \
853 &&__ESIMD_DNS::is_simd_type_v<Derived>
862 #undef __ESIMD_SHIFT_OP_FILTER
867 #define __ESIMD_ARITH_OP_FILTER \
868 __ESIMD_DNS::is_simd_type_v<Derived> &&__ESIMD_DNS::is_vectorizable_v<T1>
878 #undef __ESIMD_ARITH_OP_FILTER
879 #undef __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN
882 __ESIMD_DECLARE_TEST_PROXY_ACCESS
890 __ESIMD_DECLARE_TEST_PROXY
893 #ifndef __SYCL_DEVICE_ONLY__
896 __esimd_vstore<RawTy, N>(&M_data, Val);