DPC++ Runtime
Runtime libraries for oneAPI DPC++
simd_obj_impl.hpp
Go to the documentation of this file.
1 //==------------ - simd_obj_impl.hpp - DPC++ Explicit SIMD API -------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement Explicit SIMD vector APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
13 #include <sycl/aspects.hpp>
21 
22 namespace sycl {
23 inline namespace _V1 {
24 namespace ext::intel::esimd {
25 
28 
45 
47 
50 
55  template <typename VT, typename ET = detail::element_type_t<VT>>
56  static constexpr unsigned alignment = alignof(ET);
57 };
58 
63  template <typename VT> static constexpr unsigned alignment = alignof(VT);
64 };
65 
70 template <unsigned N> struct overaligned_tag {
71  static_assert(
73  "Alignment value N for overaligned_tag<N> must be a power of two");
74  template <typename> static constexpr unsigned alignment = N;
75 };
76 
77 inline constexpr element_aligned_tag element_aligned = {};
78 
79 inline constexpr vector_aligned_tag vector_aligned = {};
80 
81 template <unsigned N> inline constexpr overaligned_tag<N> overaligned = {};
82 
84 template <typename T> struct is_simd_flag_type : std::false_type {};
85 
86 template <> struct is_simd_flag_type<element_aligned_tag> : std::true_type {};
87 
88 template <> struct is_simd_flag_type<vector_aligned_tag> : std::true_type {};
89 
90 template <unsigned N>
91 struct is_simd_flag_type<overaligned_tag<N>> : std::true_type {};
92 
95 template <typename T>
96 static inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<T>::value;
97 
99 
100 namespace detail {
101 
106 struct dqword_element_aligned_tag {
107  template <typename VT, typename ET = detail::element_type_t<VT>>
108  static constexpr unsigned alignment = alignof(ET) > 4 ? alignof(ET) : 4;
109 };
110 
111 inline constexpr dqword_element_aligned_tag dqword_element_aligned = {};
112 
113 // Functions to support efficient simd constructors - avoiding internal loop
114 // over elements.
115 template <class T, int N, size_t... Is>
116 constexpr vector_type_t<T, N> make_vector_impl(const T (&&Arr)[N],
117  std::index_sequence<Is...>) {
118  return vector_type_t<T, N>{Arr[Is]...};
119 }
120 
121 template <class T, int N>
122 constexpr vector_type_t<T, N> make_vector(const T (&&Arr)[N]) {
123  return make_vector_impl<T, N>(std::move(Arr), std::make_index_sequence<N>{});
124 }
125 
126 template <class T, int N, size_t... Is>
127 constexpr auto make_vector_impl(T Base, T Stride, std::index_sequence<Is...>) {
128  if constexpr (std::is_integral_v<T> && N <= 3) {
129  // This sequence is a bit more efficient for integral types and N <= 3.
130  return vector_type_t<T, N>{(T)(Base + ((T)Is) * Stride)...};
131  } else {
132  using CppT = typename element_type_traits<T>::EnclosingCppT;
133  CppT BaseCpp = Base;
134  CppT StrideCpp = Stride;
135  vector_type_t<CppT, N> VBase = BaseCpp;
136  vector_type_t<CppT, N> VStride = StrideCpp;
137  vector_type_t<CppT, N> VStrideCoef{(CppT)(Is)...};
138  vector_type_t<CppT, N> Result{VBase + VStride * VStrideCoef};
139  return wrapper_type_converter<T>::template to_vector<N>(Result);
140  }
141 }
142 
143 template <class T, int N> constexpr auto make_vector(T Base, T Stride) {
144  return make_vector_impl<T, N>(Base, Stride, std::make_index_sequence<N>{});
145 }
146 
148 
151 
174 template <typename RawTy, int N, class Derived, class SFINAE>
175 #ifndef __SYCL_DEVICE_ONLY__
177 #else
178 class [[__sycl_detail__::__uses_aspects__(
179  sycl::aspect::ext_intel_esimd)]] simd_obj_impl {
180 #endif
182 
183  // For the is_simd_obj_impl_derivative helper to work correctly, all derived
184  // classes must be templated by element type and number of elements. If fewer
185  // template arguments are needed, template aliases can be used
186  // (simd_mask_type).
187  //
188  template <typename, typename> friend class simd_view;
189  template <typename, typename> friend class simd_view_impl;
190  template <typename, int> friend class simd;
191  template <typename, int> friend class simd_mask_impl;
192 
194 
195 public:
197  using element_type = get_vector_element_type<Derived>;
198 
200  using raw_vector_type = vector_type_t<RawTy, N>;
201 
203  using raw_element_type = RawTy;
204 
206  static constexpr int length = N;
207 
208 protected:
210  using Ty = element_type;
211 
212  template <bool UseSet = true>
213  void init_from_array(const Ty (&&Arr)[N]) noexcept {
214  raw_vector_type tmp;
215 
216  if constexpr (is_wrapper_elem_type_v<Ty>) {
217  for (auto I = 0; I < N; ++I) {
218  tmp[I] = bitcast_to_raw_type(Arr[I]);
219  }
220  } else {
221  tmp = make_vector(std::move(Arr));
222  }
223  if constexpr (UseSet) {
224  set(std::move(tmp));
225  } else {
226  M_data = std::move(tmp);
227  }
228  }
229 
230  explicit operator raw_vector_type() const {
231  __esimd_dbg_print(explicit operator raw_vector_type());
232  return data();
233  }
234 
235 private:
236  Derived &cast_this_to_derived() { return reinterpret_cast<Derived &>(*this); }
237  const Derived &cast_this_to_derived() const {
238  return reinterpret_cast<const Derived &>(*this);
239  }
240 
242 
243 public:
246  simd_obj_impl() = default;
247 
250  simd_obj_impl(const simd_obj_impl &other) {
252  set(other.data());
253  }
254 
261  template <class Ty1, typename Derived1>
263  __esimd_dbg_print(simd_obj_impl(const simd_obj_impl... > &other));
264  set(convert_vector<Ty, element_type_t<Derived1>, N>(other.data()));
265  }
266 
271  set(Val);
272  }
273 
282  simd_obj_impl(Ty Base, Ty Step) noexcept {
283  __esimd_dbg_print(simd_obj_impl(Ty Base, Ty Step));
284  M_data = make_vector<Ty, N>(Base, Step);
285  }
286 
292  template <class T1,
293  class = std::enable_if_t<detail::is_valid_simd_elem_type_v<T1>>>
296  M_data = bitcast_to_raw_type(detail::convert_scalar<Ty>(Val));
297  }
298 
303  template <int N1, class = std::enable_if_t<N1 == N>>
304  simd_obj_impl(const Ty (&&Arr)[N1]) noexcept {
305  __esimd_dbg_print(simd_obj_impl(const Ty(&&Arr)[N1]));
306  init_from_array<false /*init M_data w/o using set(...)*/>(std::move(Arr));
307  // It is OK not to mark a write to M_data with __esimd_vstore (via 'set')
308  // here because:
309  // - __esimd_vstore/vload are need only to mark ESIMD_PRIVATE variable
310  // access for the VC BE to generate proper code for them.
311  // - initializers are not allowed for ESIMD_PRIVATE vars, so only the
312  // default ctor can be used for them
313  }
314 
321  template <typename Flags = element_aligned_tag,
322  typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
323  simd_obj_impl(const Ty *ptr, Flags = {}) noexcept {
324  __esimd_dbg_print(simd_obj_impl(const Ty *ptr, Flags));
325  copy_from(ptr, Flags{});
326  }
327 
338  template <
339  typename AccessorT, typename Flags = element_aligned_tag,
340  typename = std::enable_if_t<
341  detail::is_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
342  is_simd_flag_type_v<Flags>>>
343  simd_obj_impl(AccessorT acc,
344 #ifdef __ESIMD_FORCE_STATELESS_MEM
345  uint64_t offset,
346 #else
347  uint32_t offset,
348 #endif
349  Flags = {}) noexcept {
350  __esimd_dbg_print(simd_obj_impl(AccessorT acc,
351 #ifdef __ESIMD_FORCE_STATELESS_MEM
352  uint64_t offset,
353 #else
354  uint32_t offset,
355 #endif
356  Flags));
357  copy_from(acc, offset, Flags{});
358  }
359 
361  Derived &operator=(const simd_obj_impl &other) noexcept {
362  set(other.data());
363  return cast_this_to_derived();
364  }
365 
368  template <typename T = simd_obj_impl,
369  typename = std::enable_if_t<T::length == 1>>
370  operator Ty() const {
371  __esimd_dbg_print(operator Ty());
372  return bitcast_to_wrapper_type<Ty>(data()[0]);
373  }
374 
378 #ifndef __SYCL_DEVICE_ONLY__
379  return M_data;
380 #else
381  return __esimd_vload<RawTy, N>(&M_data);
382 #endif
383  }
384 
388  raw_vector_type &data_ref() { return M_data; }
389 
392  Derived read() const { return Derived{data()}; }
393 
396  Derived &write(const Derived &Val) {
397  set(Val.data());
398  return cast_this_to_derived();
399  }
400 
407  void merge(const Derived &Val, const simd_mask_type<N> &Mask) {
408  set(__esimd_wrregion<RawTy, N, N, 0 /*VS*/, N, 1, N>(data(), Val.data(), 0,
409  Mask.data()));
410  }
411 
419  void merge(const Derived &Val1, Derived Val2, const simd_mask_type<N> &Mask) {
420  Val2.merge(Val1, Mask);
421  set(Val2.data());
422  }
423 
430  template <typename EltTy> auto bit_cast_view() &[[clang::lifetimebound]] {
431  using TopRegionTy = compute_format_type_t<Derived, EltTy>;
432  using RetTy = simd_view<Derived, TopRegionTy>;
433  return RetTy{cast_this_to_derived(), TopRegionTy{0}};
434  }
435 
446  template <typename EltTy, int Height, int Width>
447  auto bit_cast_view() &[[clang::lifetimebound]] {
448  using TopRegionTy = compute_format_type_2d_t<Derived, EltTy, Height, Width>;
449  using RetTy = simd_view<Derived, TopRegionTy>;
450  return RetTy{cast_this_to_derived(), TopRegionTy{0, 0}};
451  }
452 
460  template <int Size, int Stride>
462  select(uint16_t Offset = 0) &[[clang::lifetimebound]] {
463  static_assert(Size > 1 || Stride == 1,
464  "Stride must be 1 in single-element region");
465  region1d_t<Ty, Size, Stride> Reg(Offset);
466  return {cast_this_to_derived(), std::move(Reg)};
467  }
468 
476  template <int Size, int Stride>
477  resize_a_simd_type_t<Derived, Size> select(uint16_t Offset = 0) && {
478  static_assert(Size > 1 || Stride == 1,
479  "Stride must be 1 in single-element region");
480  Derived &&Val = std::move(cast_this_to_derived());
481  return __esimd_rdregion<RawTy, N, Size, /*VS*/ 0, Size, Stride>(Val.data(),
482  Offset);
483  }
484 
488  Ty operator[](int i) const { return bitcast_to_wrapper_type<Ty>(data()[i]); }
489 
494  [[clang::lifetimebound]] {
495  return select<1, 1>(i);
496  }
497 
503  template <int Size>
504  resize_a_simd_type_t<Derived, Size>
505  iselect(const simd<uint16_t, Size> &Indices) {
506  vector_type_t<uint16_t, Size> Offsets = Indices.data() * sizeof(RawTy);
507  return __esimd_rdindirect<RawTy, N, Size>(data(), Offsets);
508  }
509 
513  void iupdate(ushort Index, Ty V) {
514  auto Val = data();
515  Val[Index] = bitcast_to_raw_type(V);
516  set(Val);
517  }
518 
524  template <int Size>
525  void iupdate(const simd<uint16_t, Size> &Indices,
526  const resize_a_simd_type_t<Derived, Size> &Val,
527  const simd_mask_type<Size> &Mask) {
528  vector_type_t<uint16_t, Size> Offsets = Indices.data() * sizeof(RawTy);
529  set(__esimd_wrindirect<RawTy, N, Size>(data(), Val.data(), Offsets,
530  Mask.data()));
531  }
532 
536  template <int Rep> resize_a_simd_type_t<Derived, Rep * N> replicate() const {
537  return replicate_w<Rep, N>(0);
538  }
539 
547  template <int Rep, int W>
548  resize_a_simd_type_t<Derived, Rep * W> replicate_w(uint16_t Offset) const {
549  return replicate_vs_w_hs<Rep, 0, W, 1>(Offset);
550  }
551 
561  template <int Rep, int VS, int W>
562  resize_a_simd_type_t<Derived, Rep * W> replicate_vs_w(uint16_t Offset) const {
563  return replicate_vs_w_hs<Rep, VS, W, 1>(Offset);
564  }
565 
614  template <int Rep, int VS, int W, int HS>
615  resize_a_simd_type_t<Derived, Rep * W>
616  replicate_vs_w_hs(uint16_t Offset) const {
617  return __esimd_rdregion<RawTy, N, Rep * W, VS, W, HS, N>(
618  data(), Offset * sizeof(RawTy));
619  }
620 
624  template <typename T1 = Ty,
625  typename = std::enable_if_t<std::is_integral_v<T1>>>
626  uint16_t any() const {
627  return __esimd_any<Ty, N>(data());
628  }
629 
633  template <typename T1 = Ty,
634  typename = std::enable_if_t<std::is_integral_v<T1>>>
635  uint16_t all() const {
636  return __esimd_all<Ty, N>(data());
637  }
638 
639 protected:
642  template <typename RTy, class ElemTy = __raw_t<typename RTy::element_type>>
643  ESIMD_INLINE void writeRegion(RTy Region,
644  const vector_type_t<ElemTy, RTy::length> &Val) {
645 
646  if constexpr (N * sizeof(RawTy) == RTy::length * sizeof(ElemTy))
647  // update the entire vector
648  set(bitcast<RawTy, ElemTy, RTy::length>(Val));
649  else {
650  static_assert(!RTy::Is_2D);
651  // If element type differs, do bitcast conversion first.
652  auto Base = bitcast<ElemTy, RawTy, N>(data());
653  constexpr int BN = (N * sizeof(RawTy)) / sizeof(ElemTy);
654  // Access the region information.
655  constexpr int M = RTy::Size_x;
656  constexpr int Stride = RTy::Stride_x;
657  uint16_t Offset = Region.M_offset_x * sizeof(ElemTy);
658  static_assert(M > 0, "Malformed RHS region.");
659  static_assert(M <= BN, "Attempt to write beyond viewed area: The viewed "
660  "object in LHS does not fit RHS.");
661  // (M > BN) condition is added below to not duplicate the above assert
662  // for big values of M. The assert below is for 'Stride'.
663  static_assert((M > BN) || (M - 1) * Stride < BN,
664  "Malformed RHS region - too big stride.");
665 
666  // Merge and update.
667  auto Merged = __esimd_wrregion<ElemTy, BN, M,
668  /*VS*/ 0, M, Stride>(Base, Val, Offset);
669  // Convert back to the original element type, if needed.
670  set(bitcast<RawTy, ElemTy, BN>(Merged));
671  }
672  }
673 
676  template <typename TR, typename UR,
677  class ElemTy = __raw_t<typename TR::element_type>>
678  ESIMD_INLINE void writeRegion(std::pair<TR, UR> Region,
679  const vector_type_t<ElemTy, TR::length> &Val) {
680  // parent-region type
681  using PaTy = typename shape_type<UR>::type;
682  using BT = __raw_t<typename PaTy::element_type>;
683  constexpr int BN = PaTy::length;
684 
685  if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) {
686  writeRegion(Region.second, bitcast<BT, ElemTy, TR::length>(Val));
687  } else {
688  // Recursively read the base
689  auto Base = readRegion<RawTy, N>(data(), Region.second);
690  // If element type differs, do bitcast conversion first.
691  auto Base1 = bitcast<ElemTy, BT, BN>(Base);
692  constexpr int BN1 = PaTy::Size_in_bytes / sizeof(ElemTy);
693 
694  if constexpr (!TR::Is_2D) {
695  // Access the region information.
696  constexpr int M = TR::Size_x;
697  constexpr int Stride = TR::Stride_x;
698  uint16_t Offset = Region.first.M_offset_x * sizeof(ElemTy);
699 
700  static_assert(M <= BN1, "Attempt to write beyond viewed area: The "
701  "viewed object in LHS does not fit RHS.");
702  static_assert(M > 0, "Malformed RHS region.");
703  static_assert((M - 1) * Stride < BN,
704  "Malformed RHS region - too big stride.");
705  // Merge and update.
706  Base1 = __esimd_wrregion<ElemTy, BN1, M,
707  /*VS*/ 0, M, Stride>(Base1, Val, Offset);
708  } else {
709  static_assert(std::is_same<ElemTy, BT>::value);
710  // Read columns with non-trivial horizontal stride.
711  constexpr int M = TR::length;
712  constexpr int VS = PaTy::Size_x * TR::Stride_y;
713  constexpr int W = TR::Size_x;
714  constexpr int HS = TR::Stride_x;
715  constexpr int ParentWidth = PaTy::Size_x;
716 
717  // Compute the byte offset for the starting element.
718  uint16_t Offset = static_cast<uint16_t>(
719  (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) *
720  sizeof(ElemTy));
721 
722  static_assert(M <= BN1, "Attempt to write beyond viewed area: The "
723  "viewed object in LHS does not fit RHS.");
724  static_assert(M > 0 && W > 0 && M % W == 0, "Malformed RHS region.");
725  static_assert(W == 0 || ((M / W) - 1) * VS + (W - 1) * HS < BN1,
726  "Malformed RHS region - too big vertical and/or "
727  "horizontal stride.");
728  // Merge and update.
729  Base1 = __esimd_wrregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(
730  Base1, Val, Offset);
731  }
732  // Convert back to the original element type, if needed.
733  auto Merged1 = bitcast<BT, ElemTy, BN1>(Base1);
734  // recursively write it back to the base
735  writeRegion(Region.second, Merged1);
736  }
737  }
738 
739 public:
750  template <typename Flags = element_aligned_tag, int ChunkSize = 32,
751  typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
752  ESIMD_INLINE void copy_from(const Ty *addr, Flags = {}) SYCL_ESIMD_FUNCTION;
753 
765  template <typename AccessorT, typename Flags = element_aligned_tag,
766  int ChunkSize = 32,
767  typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
768  ESIMD_INLINE EnableIfAccessor<AccessorT, accessor_mode_cap::can_read, void>
769  copy_from(AccessorT acc,
770 #ifdef __ESIMD_FORCE_STATELESS_MEM
771  uint64_t offset,
772 #else
773  uint32_t offset,
774 #endif
775  Flags = {}) SYCL_ESIMD_FUNCTION;
776 
788  template <typename AccessorT, typename Flags = element_aligned_tag,
789  int ChunkSize = 32,
790  typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
791  ESIMD_INLINE std::enable_if_t<
792  detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_read>,
793  void>
794  copy_from(AccessorT acc, uint32_t offset, Flags = {}) SYCL_ESIMD_FUNCTION;
795 
802  template <typename Flags = element_aligned_tag, int ChunkSize = 32,
803  typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
804  ESIMD_INLINE void copy_to(Ty *addr, Flags = {}) const SYCL_ESIMD_FUNCTION;
805 
815  template <typename AccessorT, typename Flags = element_aligned_tag,
816  int ChunkSize = 32,
817  typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
818  ESIMD_INLINE EnableIfAccessor<AccessorT, accessor_mode_cap::can_write, void>
819  copy_to(AccessorT acc,
820 #ifdef __ESIMD_FORCE_STATELESS_MEM
821  uint64_t offset,
822 #else
823  uint32_t offset,
824 #endif
825  Flags = {}) const SYCL_ESIMD_FUNCTION;
826 
836  template <typename AccessorT, typename Flags = element_aligned_tag,
837  int ChunkSize = 32,
838  typename = std::enable_if_t<is_simd_flag_type_v<Flags>>>
839  ESIMD_INLINE std::enable_if_t<
840  detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_write>,
841  void>
842  copy_to(AccessorT acc, uint32_t offset, Flags = {}) const SYCL_ESIMD_FUNCTION;
843 
844  // Unary operations.
845 
849  template <class T1 = Ty, class = std::enable_if_t<std::is_integral_v<T1>>>
850  Derived operator~() const {
851  return Derived{
852  detail::vector_unary_op<detail::UnaryOp::bit_not, T1, N>(data())};
853  }
854 
861  template <class T1 = Ty, class = std::enable_if_t<std::is_integral_v<T1>>>
862  simd_mask_type<N> operator!() const {
863  return *this == 0;
864  }
865 
866 #define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \
867  \
868  \
869  \
870  \
871  \
872  template <class T1, class SimdT, \
873  class = std::enable_if_t<(is_simd_type_v<Derived> == \
874  is_simd_type_v<SimdT>)&&COND>> \
875  Derived &operator OPASSIGN( \
876  const __ESIMD_DNS::simd_obj_impl<T1, N, SimdT> &RHS) { \
877  auto Res = *this BINOP RHS; \
878  using ResT = decltype(Res); \
879  set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
880  length>(Res.data())); \
881  return cast_this_to_derived(); \
882  } \
883  \
884  \
885  \
886  \
887  \
888  \
889  template <class SimdT1, class RegionT1, \
890  class T1 = typename RegionT1::element_type, \
891  class = std::enable_if_t< \
892  (is_simd_type_v<Derived> == \
893  is_simd_type_v<SimdT1>)&&(RegionT1::length == length) && \
894  COND>> \
895  Derived &operator OPASSIGN( \
896  const __ESIMD_NS::simd_view<SimdT1, RegionT1> &RHS) { \
897  auto Res = *this BINOP RHS.read(); \
898  using ResT = decltype(Res); \
899  set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
900  length>(Res.data())); \
901  return cast_this_to_derived(); \
902  } \
903  \
904  \
905  \
906  \
907  template <class T1, class = std::enable_if_t<COND>> \
908  Derived &operator OPASSIGN(T1 RHS) { \
909  if constexpr (is_simd_type_v<Derived>) { \
910  using RHSVecT = __ESIMD_DNS::construct_a_simd_type_t<Derived, T1, N>; \
911  return *this OPASSIGN RHSVecT(RHS); \
912  } else { \
913  return *this OPASSIGN Derived((RawTy)RHS); \
914  } \
915  }
916 
917 // Bitwise operations are defined for simd objects and masks, and both operands
918 // must be integral
919 #define __ESIMD_BITWISE_OP_FILTER \
920  std::is_integral_v<element_type> &&std::is_integral_v<T1>
933 #undef __ESIMD_BITWISE_OP_FILTER
934 
935 // Bit shift operations are defined only for simd objects (not for masks), and
936 // both operands must be integral
937 #define __ESIMD_SHIFT_OP_FILTER \
938  std::is_integral_v<element_type> &&std::is_integral_v<T1> \
939  &&__ESIMD_DNS::is_simd_type_v<Derived>
940 
948 #undef __ESIMD_SHIFT_OP_FILTER
949 
950 // Arithmetic operations are defined only for simd objects, and the second
951 // operand's element type must be vectorizable. This requirement for 'this'
952 // is fulfilled, because otherwise 'this' couldn't have been constructed.
953 #define __ESIMD_ARITH_OP_FILTER \
954  __ESIMD_DNS::is_simd_type_v<Derived> &&__ESIMD_DNS::is_vectorizable_v<T1>
955 
964 #undef __ESIMD_ARITH_OP_FILTER
965 #undef __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN
966 
967  // Getter for the test proxy member, if enabled
968  __ESIMD_DECLARE_TEST_PROXY_ACCESS
969 
970 private:
971  // The underlying data for this vector.
972  raw_vector_type M_data;
973 
974  template <int ChunkSize, typename Flags, typename AccessorT, typename TOffset>
975  ESIMD_INLINE void copy_to_impl(AccessorT acc,
976  TOffset offset) const SYCL_ESIMD_FUNCTION;
977  template <int ChunkSize, typename Flags, typename AccessorT, typename TOffset>
978  ESIMD_INLINE void copy_from_impl(AccessorT acc,
979  TOffset offset) SYCL_ESIMD_FUNCTION;
980 
981 protected:
982  // The test proxy if enabled
983  __ESIMD_DECLARE_TEST_PROXY
984 
985  void set(const raw_vector_type &Val) {
986 #ifndef __SYCL_DEVICE_ONLY__
987  M_data = Val;
988 #else
989  __esimd_vstore<RawTy, N>(&M_data, Val);
990 #endif
991  }
992 };
994 
995 } // namespace detail
996 
997 template <>
998 struct is_simd_flag_type<detail::dqword_element_aligned_tag> : std::true_type {
999 };
1000 } // namespace ext::intel::esimd
1001 } // namespace _V1
1002 } // namespace sycl
This class is a simd_obj_impl specialization representing a simd mask, which is basically a simd_obj_...
This is a base class for all ESIMD simd classes with real storage (simd, simd_mask_impl).
ESIMD_INLINE void writeRegion(std::pair< TR, UR > Region, const vector_type_t< ElemTy, TR::length > &Val)
Write a simd_obj_impl-vector into a nested region of a simd_obj_impl object.
simd_obj_impl(T1 Val) noexcept
Broadcast constructor.
resize_a_simd_type_t< Derived, Size > iselect(const simd< uint16_t, Size > &Indices)
Indirect select - select and extract multiple elements with given variable indices.
resize_a_simd_type_t< Derived, Rep *N > replicate() const
Replicates contents of this vector a number of times into a new vector.
Derived & operator=(const simd_obj_impl &other) noexcept
Copy assignment operator.
ESIMD_INLINE EnableIfAccessor< AccessorT, accessor_mode_cap::can_write, void > copy_to(AccessorT acc, uint32_t offset, Flags={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
get_vector_element_type< Derived > element_type
Element type of the derived (user) class.
Derived & write(const Derived &Val)
Replaces the underlying data with the one taken from another object.
Ty operator[](int i) const
Get value of this vector's element.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_read >, void > copy_from(AccessorT acc, uint32_t offset, Flags={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
static constexpr int length
The number of elements in this object.
simd_obj_impl(Ty Base, Ty Step) noexcept
Arithmetic progression constructor.
auto bit_cast_view() &
Create a 2-dimensional view of this object.
uint16_t all() const
See if all elements are non-zero.
ESIMD_INLINE EnableIfAccessor< AccessorT, accessor_mode_cap::can_read, void > copy_from(AccessorT acc, uint32_t offset, Flags={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
simd_obj_impl(const simd_obj_impl< Ty1, N, Derived1, SFINAE > &other)
Implicit conversion constructor from another simd_obj_impl object.
void iupdate(const simd< uint16_t, Size > &Indices, const resize_a_simd_type_t< Derived, Size > &Val, const simd_mask_type< Size > &Mask)
Indirect update - update multiple elements with given variable indices.
simd_obj_impl(const simd_obj_impl &other)
Copy constructor.
void merge(const Derived &Val, const simd_mask_type< N > &Mask)
"Merges" this object's value with another object: replaces part of the underlying data with the one t...
simd_mask_type< N > operator!() const
Unary logical negation operator, available in all subclasses, but only for integral element types (si...
simd_obj_impl(AccessorT acc, uint32_t offset, Flags={}) noexcept
Accessor-based load constructor.
void iupdate(ushort Index, Ty V)
Update single element with variable index.
RawTy raw_element_type
The element type of the raw storage vector.
ESIMD_INLINE void writeRegion(RTy Region, const vector_type_t< ElemTy, RTy::length > &Val)
Write a simd_obj_impl-vector into a basic region of a simd_obj_impl object.
simd_obj_impl(const Ty(&&Arr)[N1]) noexcept
Rvalue array-based constructor.
void merge(const Derived &Val1, Derived Val2, const simd_mask_type< N > &Mask)
Merges given two objects with a mask and writes resulting data into this object.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_write >, void > copy_to(AccessorT acc, uint32_t offset, Flags={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
Derived operator~() const
Per-element bitwise inversion, available in all subclasses, but only for integral element types (simd...
auto bit_cast_view() &
Create a 1-dimensional view of this object.
resize_a_simd_type_t< Derived, Rep *W > replicate_w(uint16_t Offset) const
Shortcut to replicate_vs_w_hs with VS=0 and HS=1 to replicate a single "dense" (w/o gaps between elem...
ESIMD_INLINE void copy_from(const Ty *addr, Flags={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
ESIMD_INLINE void copy_to(Ty *addr, Flags={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
resize_a_simd_type_t< Derived, Rep *W > replicate_vs_w_hs(uint16_t Offset) const
This function "replicates" a portion of this object's elements into a new object.
__ESIMD_DECLARE_TEST_PROXY void set(const raw_vector_type &Val)
simd_view< Derived, region1d_t< Ty, Size, Stride > > select(uint16_t Offset=0) &
Select elements of this object into a subregion and create a 1D view for for it.
simd_obj_impl(const raw_vector_type &Val)
Implicit conversion constructor from a raw vector object.
resize_a_simd_type_t< Derived, Size > select(uint16_t Offset=0) &&
Select and extract a subregion of this object's elements and return it as a new vector object.
simd_view< Derived, region1d_scalar_t< Ty > > operator[](int i)
Return writable view of a single element.
vector_type_t< RawTy, N > raw_vector_type
The underlying raw storage vector data type.
simd_obj_impl(const Ty *ptr, Flags={}) noexcept
Pointer-based load constructor.
uint16_t any() const
See if any element is non-zero.
resize_a_simd_type_t< Derived, Rep *W > replicate_vs_w(uint16_t Offset) const
Shortcut to replicate_vs_w_hs with HS=1 to replicate dense blocks.
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
The main simd vector class.
Definition: simd.hpp:53
#define __esimd_dbg_print(a)
Definition: types.hpp:24
constexpr vector_aligned_tag vector_aligned
static constexpr bool is_simd_flag_type_v
Checks if given type is a simd load/store flag.
constexpr overaligned_tag< N > overaligned
constexpr element_aligned_tag element_aligned
unsigned short ushort
Definition: common.hpp:41
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:95
constexpr alignment_key::value_t< K > alignment
Definition: access.hpp:18
#define __ESIMD_SHIFT_OP_FILTER
#define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND)
#define __ESIMD_BITWISE_OP_FILTER
#define __ESIMD_ARITH_OP_FILTER
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Checks if type is a simd load/store flag.