DPC++ Runtime
Runtime libraries for oneAPI DPC++
simd_obj_impl.hpp
Go to the documentation of this file.
1 //==------------ - simd_obj_impl.hpp - DPC++ Explicit SIMD API -------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement Explicit SIMD vector APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
13 #include <sycl/aspects.hpp>
21 
22 namespace sycl {
23 inline namespace _V1 {
24 namespace ext::intel::esimd {
25 
28 
45 
47 
50 
55  template <typename VT, typename ET = detail::element_type_t<VT>>
56  static constexpr unsigned alignment = alignof(ET);
57 };
58 
63  template <typename VT> static constexpr unsigned alignment = alignof(VT);
64 };
65 
70 template <unsigned N> struct overaligned_tag {
71  static_assert(
73  "Alignment value N for overaligned_tag<N> must be a power of two");
74  template <typename> static constexpr unsigned alignment = N;
75 };
76 
77 inline constexpr element_aligned_tag element_aligned = {};
78 
79 inline constexpr vector_aligned_tag vector_aligned = {};
80 
81 template <unsigned N> inline constexpr overaligned_tag<N> overaligned = {};
82 
84 template <typename T> struct is_simd_flag_type : std::false_type {};
85 
86 template <> struct is_simd_flag_type<element_aligned_tag> : std::true_type {};
87 
88 template <> struct is_simd_flag_type<vector_aligned_tag> : std::true_type {};
89 
90 template <unsigned N>
91 struct is_simd_flag_type<overaligned_tag<N>> : std::true_type {};
92 
95 template <typename T>
96 static inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<T>::value;
97 
99 
100 namespace detail {
101 
106 struct dqword_element_aligned_tag {
107  template <typename VT, typename ET = detail::element_type_t<VT>>
108  static constexpr unsigned alignment = alignof(ET) > 4 ? alignof(ET) : 4;
109 };
110 
111 inline constexpr dqword_element_aligned_tag dqword_element_aligned = {};
112 
113 // Functions to support efficient simd constructors - avoiding internal loop
114 // over elements.
115 template <class T, int N, size_t... Is>
116 constexpr vector_type_t<T, N> make_vector_impl(const T (&&Arr)[N],
117  std::index_sequence<Is...>) {
118  return vector_type_t<T, N>{Arr[Is]...};
119 }
120 
121 template <class T, int N>
122 constexpr vector_type_t<T, N> make_vector(const T (&&Arr)[N]) {
123  return make_vector_impl<T, N>(std::move(Arr), std::make_index_sequence<N>{});
124 }
125 
126 template <class T, int N, size_t... Is>
127 constexpr auto make_vector_impl(T Base, T Stride, std::index_sequence<Is...>) {
128  if constexpr (std::is_integral_v<T> && N <= 3) {
129  // This sequence is a bit more efficient for integral types and N <= 3.
130  return vector_type_t<T, N>{(T)(Base + ((T)Is) * Stride)...};
131  } else {
132  using CppT = typename element_type_traits<T>::EnclosingCppT;
133  CppT BaseCpp = Base;
134  CppT StrideCpp = Stride;
135  vector_type_t<CppT, N> VBase = BaseCpp;
136  vector_type_t<CppT, N> VStride = StrideCpp;
137  vector_type_t<CppT, N> VStrideCoef{(CppT)(Is)...};
138  vector_type_t<CppT, N> Result{VBase + VStride * VStrideCoef};
139  return wrapper_type_converter<T>::template to_vector<N>(Result);
140  }
141 }
142 
143 template <class T, int N> constexpr auto make_vector(T Base, T Stride) {
144  return make_vector_impl<T, N>(Base, Stride, std::make_index_sequence<N>{});
145 }
146 
148 
151 
174 template <typename RawTy, int N, class Derived, class SFINAE>
175 #ifndef __SYCL_DEVICE_ONLY__
177 #else
178 class [[__sycl_detail__::__uses_aspects__(
179  sycl::aspect::ext_intel_esimd)]] simd_obj_impl {
180 #endif
182 
183  // For the is_simd_obj_impl_derivative helper to work correctly, all derived
184  // classes must be templated by element type and number of elements. If fewer
185  // template arguments are needed, template aliases can be used
186  // (simd_mask_type).
187  //
188  template <typename, typename> friend class simd_view;
189  template <typename, typename> friend class simd_view_impl;
190  template <typename, int> friend class simd;
191  template <typename, int> friend class simd_mask_impl;
192 
194 
195 public:
197  using element_type = get_vector_element_type<Derived>;
198 
200  using raw_vector_type = vector_type_t<RawTy, N>;
201 
203  using raw_element_type = RawTy;
204 
206  static constexpr int length = N;
207 
208 protected:
210  using Ty = element_type;
211 
212  template <bool UseSet = true>
213  void init_from_array(const Ty (&&Arr)[N]) noexcept {
214  raw_vector_type tmp;
215 
216  if constexpr (is_wrapper_elem_type_v<Ty>) {
217  for (auto I = 0; I < N; ++I) {
218  tmp[I] = bitcast_to_raw_type(Arr[I]);
219  }
220  } else {
221  tmp = make_vector(std::move(Arr));
222  }
223  if constexpr (UseSet) {
224  set(std::move(tmp));
225  } else {
226  M_data = std::move(tmp);
227  }
228  }
229 
230  explicit operator raw_vector_type() const {
231  __esimd_dbg_print(explicit operator raw_vector_type());
232  return data();
233  }
234 
235 private:
236  Derived &cast_this_to_derived() { return reinterpret_cast<Derived &>(*this); }
237  const Derived &cast_this_to_derived() const {
238  return reinterpret_cast<const Derived &>(*this);
239  }
240 
242 
243 public:
246  simd_obj_impl() = default;
247 
250  simd_obj_impl(const simd_obj_impl &other) {
252  set(other.data());
253  }
254 
261  template <class Ty1, typename Derived1>
263  __esimd_dbg_print(simd_obj_impl(const simd_obj_impl... > &other));
264  set(convert_vector<Ty, element_type_t<Derived1>, N>(other.data()));
265  }
266 
271  set(Val);
272  }
273 
282  simd_obj_impl(Ty Base, Ty Step) noexcept {
283  __esimd_dbg_print(simd_obj_impl(Ty Base, Ty Step));
284  M_data = make_vector<Ty, N>(Base, Step);
285  }
286 
292  template <class T1,
293  class = std::enable_if_t<detail::is_valid_simd_elem_type_v<T1>>>
296  M_data = bitcast_to_raw_type(detail::convert_scalar<Ty>(Val));
297  }
298 
303  template <int N1, class = std::enable_if_t<N1 == N>>
304  simd_obj_impl(const Ty (&&Arr)[N1]) noexcept {
305  __esimd_dbg_print(simd_obj_impl(const Ty(&&Arr)[N1]));
306  init_from_array<false /*init M_data w/o using set(...)*/>(std::move(Arr));
307  // It is OK not to mark a write to M_data with __esimd_vstore (via 'set')
308  // here because:
309  // - __esimd_vstore/vload are need only to mark ESIMD_PRIVATE variable
310  // access for the VC BE to generate proper code for them.
311  // - initializers are not allowed for ESIMD_PRIVATE vars, so only the
312  // default ctor can be used for them
313  }
314 
321  template <typename Flags,
322  typename std::enable_if_t<is_simd_flag_type_v<Flags>, bool> = true>
323  simd_obj_impl(const Ty *ptr, Flags) noexcept {
324  __esimd_dbg_print(simd_obj_impl(const Ty *ptr, Flags));
325  copy_from(ptr, Flags{});
326  }
327 
335  template <typename PropertyListT = oneapi::experimental::empty_properties_t,
336  typename std::enable_if_t<
337  ext::oneapi::experimental::is_property_list_v<PropertyListT>,
338  bool> = true>
339  simd_obj_impl(const Ty *ptr, PropertyListT = {}) noexcept {
340  __esimd_dbg_print(simd_obj_impl(const Ty *ptr, PropertyListT));
341  using NewPropertyListT =
342  detail::add_alignment_property_t<PropertyListT, sizeof(Ty)>;
343  copy_from(ptr, NewPropertyListT{});
344  }
345 
356  template <
357  typename AccessorT, typename Flags,
358  typename std::enable_if_t<
359  detail::is_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
360  is_simd_flag_type_v<Flags>,
361  bool> = true>
362  simd_obj_impl(AccessorT acc,
363 #ifdef __ESIMD_FORCE_STATELESS_MEM
364  uint64_t offset,
365 #else
366  uint32_t offset,
367 #endif
368  Flags) noexcept {
369  __esimd_dbg_print(simd_obj_impl(AccessorT acc,
370 #ifdef __ESIMD_FORCE_STATELESS_MEM
371  uint64_t offset,
372 #else
373  uint32_t offset,
374 #endif
375  Flags));
376  copy_from(acc, offset, Flags{});
377  }
378 
390  template <
391  typename AccessorT,
392  typename PropertyListT = oneapi::experimental::empty_properties_t,
393  typename std::enable_if_t<
394  detail::is_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
395  ext::oneapi::experimental::is_property_list_v<PropertyListT>,
396  bool> = true>
397  simd_obj_impl(AccessorT acc,
398 #ifdef __ESIMD_FORCE_STATELESS_MEM
399  uint64_t offset,
400 #else
401  uint32_t offset,
402 #endif
403  PropertyListT = {}) noexcept {
404  __esimd_dbg_print(simd_obj_impl(AccessorT acc,
405 #ifdef __ESIMD_FORCE_STATELESS_MEM
406  uint64_t offset,
407 #else
408  uint32_t offset,
409 #endif
410  PropertyListT));
411  using NewPropertyListT =
412  detail::add_alignment_property_t<PropertyListT, sizeof(Ty)>;
413  copy_from(acc, offset, NewPropertyListT{});
414  }
415 
417  Derived &operator=(const simd_obj_impl &other) noexcept {
418  set(other.data());
419  return cast_this_to_derived();
420  }
421 
424  template <typename T = simd_obj_impl,
425  typename = std::enable_if_t<T::length == 1>>
426  operator Ty() const {
427  __esimd_dbg_print(operator Ty());
428  return bitcast_to_wrapper_type<Ty>(data()[0]);
429  }
430 
434 #ifndef __SYCL_DEVICE_ONLY__
435  return M_data;
436 #else
437  return __esimd_vload<RawTy, N>(&M_data);
438 #endif
439  }
440 
444  raw_vector_type &data_ref() { return M_data; }
445 
448  Derived read() const { return Derived{data()}; }
449 
452  Derived &write(const Derived &Val) {
453  set(Val.data());
454  return cast_this_to_derived();
455  }
456 
463  void merge(const Derived &Val, const simd_mask_type<N> &Mask) {
464  check_wrregion_params<N, N, 0 /*VS*/, N, 1>();
465  set(__esimd_wrregion<RawTy, N, N, 0 /*VS*/, N, 1, N>(data(), Val.data(), 0,
466  Mask.data()));
467  }
468 
476  void merge(const Derived &Val1, Derived Val2, const simd_mask_type<N> &Mask) {
477  Val2.merge(Val1, Mask);
478  set(Val2.data());
479  }
480 
487  template <typename EltTy> auto bit_cast_view() &[[clang::lifetimebound]] {
488  using TopRegionTy = compute_format_type_t<Derived, EltTy>;
489  using RetTy = simd_view<Derived, TopRegionTy>;
490  return RetTy{cast_this_to_derived(), TopRegionTy{0}};
491  }
492 
503  template <typename EltTy, int Height, int Width>
504  auto bit_cast_view() &[[clang::lifetimebound]] {
505  using TopRegionTy = compute_format_type_2d_t<Derived, EltTy, Height, Width>;
506  using RetTy = simd_view<Derived, TopRegionTy>;
507  return RetTy{cast_this_to_derived(), TopRegionTy{0, 0}};
508  }
509 
517  template <int Size, int Stride>
519  select(uint16_t Offset = 0) &[[clang::lifetimebound]] {
520  static_assert(Size > 1 || Stride == 1,
521  "Stride must be 1 in single-element region");
522  region1d_t<Ty, Size, Stride> Reg(Offset);
523  return {cast_this_to_derived(), std::move(Reg)};
524  }
525 
533  template <int Size, int Stride>
534  resize_a_simd_type_t<Derived, Size> select(uint16_t Offset = 0) && {
535  static_assert(Size > 1 || Stride == 1,
536  "Stride must be 1 in single-element region");
537  Derived &&Val = std::move(cast_this_to_derived());
538  check_rdregion_params<N, Size, /*VS*/ 0, Size, Stride>();
539  return __esimd_rdregion<RawTy, N, Size, /*VS*/ 0, Size, Stride>(Val.data(),
540  Offset);
541  }
542 
546  Ty operator[](int i) const { return bitcast_to_wrapper_type<Ty>(data()[i]); }
547 
552  [[clang::lifetimebound]] {
553  return select<1, 1>(i);
554  }
555 
561  template <int Size>
562  resize_a_simd_type_t<Derived, Size>
563  iselect(const simd<uint16_t, Size> &Indices) {
564  vector_type_t<uint16_t, Size> Offsets = Indices.data() * sizeof(RawTy);
565  return __esimd_rdindirect<RawTy, N, Size>(data(), Offsets);
566  }
567 
571  void iupdate(ushort Index, Ty V) {
572  auto Val = data();
573  Val[Index] = bitcast_to_raw_type(V);
574  set(Val);
575  }
576 
582  template <int Size>
583  void iupdate(const simd<uint16_t, Size> &Indices,
584  const resize_a_simd_type_t<Derived, Size> &Val,
585  const simd_mask_type<Size> &Mask) {
586  vector_type_t<uint16_t, Size> Offsets = Indices.data() * sizeof(RawTy);
587  set(__esimd_wrindirect<RawTy, N, Size>(data(), Val.data(), Offsets,
588  Mask.data()));
589  }
590 
594  template <int Rep> resize_a_simd_type_t<Derived, Rep * N> replicate() const {
595  return replicate_w<Rep, N>(0);
596  }
597 
605  template <int Rep, int W>
606  resize_a_simd_type_t<Derived, Rep * W> replicate_w(uint16_t Offset) const {
607  return replicate_vs_w_hs<Rep, 0, W, 1>(Offset);
608  }
609 
619  template <int Rep, int VS, int W>
620  resize_a_simd_type_t<Derived, Rep * W> replicate_vs_w(uint16_t Offset) const {
621  return replicate_vs_w_hs<Rep, VS, W, 1>(Offset);
622  }
623 
672  template <int Rep, int VS, int W, int HS>
673  resize_a_simd_type_t<Derived, Rep * W>
674  replicate_vs_w_hs(uint16_t Offset) const {
675  check_rdregion_params<N, Rep * W, VS, W, HS>();
676  return __esimd_rdregion<RawTy, N, Rep * W, VS, W, HS, N>(
677  data(), Offset * sizeof(RawTy));
678  }
679 
683  template <typename T1 = Ty,
684  typename = std::enable_if_t<std::is_integral_v<T1>>>
685  uint16_t any() const {
686  return __esimd_any<Ty, N>(data());
687  }
688 
692  template <typename T1 = Ty,
693  typename = std::enable_if_t<std::is_integral_v<T1>>>
694  uint16_t all() const {
695  return __esimd_all<Ty, N>(data());
696  }
697 
698 protected:
701  template <typename RTy, class ElemTy = __raw_t<typename RTy::element_type>>
702  ESIMD_INLINE void writeRegion(RTy Region,
703  const vector_type_t<ElemTy, RTy::length> &Val) {
704 
705  if constexpr (N * sizeof(RawTy) == RTy::length * sizeof(ElemTy))
706  // update the entire vector
707  set(bitcast<RawTy, ElemTy, RTy::length>(Val));
708  else {
709  static_assert(!RTy::Is_2D);
710  // If element type differs, do bitcast conversion first.
711  auto Base = bitcast<ElemTy, RawTy, N>(data());
712  constexpr int BN = (N * sizeof(RawTy)) / sizeof(ElemTy);
713  // Access the region information.
714  constexpr int M = RTy::Size_x;
715  constexpr int Stride = RTy::Stride_x;
716  uint16_t Offset = Region.M_offset_x * sizeof(ElemTy);
717  check_wrregion_params<BN, M, /*VS*/ 0, M, Stride>();
718  // Merge and update.
719  auto Merged = __esimd_wrregion<ElemTy, BN, M,
720  /*VS*/ 0, M, Stride>(Base, Val, Offset);
721  // Convert back to the original element type, if needed.
722  set(bitcast<RawTy, ElemTy, BN>(Merged));
723  }
724  }
725 
728  template <typename TR, typename UR,
729  class ElemTy = __raw_t<typename TR::element_type>>
730  ESIMD_INLINE void writeRegion(std::pair<TR, UR> Region,
731  const vector_type_t<ElemTy, TR::length> &Val) {
732  // parent-region type
733  using PaTy = typename shape_type<UR>::type;
734  using BT = __raw_t<typename PaTy::element_type>;
735  constexpr int BN = PaTy::length;
736 
737  if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) {
738  writeRegion(Region.second, bitcast<BT, ElemTy, TR::length>(Val));
739  } else {
740  // Recursively read the base
741  auto Base = readRegion<RawTy, N>(data(), Region.second);
742  // If element type differs, do bitcast conversion first.
743  auto Base1 = bitcast<ElemTy, BT, BN>(Base);
744  constexpr int BN1 = PaTy::Size_in_bytes / sizeof(ElemTy);
745 
746  if constexpr (!TR::Is_2D) {
747  // Access the region information.
748  constexpr int M = TR::Size_x;
749  constexpr int Stride = TR::Stride_x;
750  uint16_t Offset = Region.first.M_offset_x * sizeof(ElemTy);
751 
752  check_wrregion_params<BN1, M, /*VS*/ 0, M, Stride>();
753  // Merge and update.
754  Base1 = __esimd_wrregion<ElemTy, BN1, M,
755  /*VS*/ 0, M, Stride>(Base1, Val, Offset);
756  } else {
757  static_assert(std::is_same<ElemTy, BT>::value);
758  // Read columns with non-trivial horizontal stride.
759  constexpr int M = TR::length;
760  constexpr int VS = PaTy::Size_x * TR::Stride_y;
761  constexpr int W = TR::Size_x;
762  constexpr int HS = TR::Stride_x;
763  constexpr int ParentWidth = PaTy::Size_x;
764 
765  // Compute the byte offset for the starting element.
766  uint16_t Offset = static_cast<uint16_t>(
767  (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) *
768  sizeof(ElemTy));
769 
770  check_wrregion_params<BN1, M, VS, W, HS>();
771  // Merge and update.
772  Base1 = __esimd_wrregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(
773  Base1, Val, Offset);
774  }
775  // Convert back to the original element type, if needed.
776  auto Merged1 = bitcast<BT, ElemTy, BN1>(Base1);
777  // recursively write it back to the base
778  writeRegion(Region.second, Merged1);
779  }
780  }
781 
782 public:
793  template <typename Flags = element_aligned_tag, int ChunkSize = 32>
794  ESIMD_INLINE std::enable_if_t<is_simd_flag_type_v<Flags>>
795  copy_from(const Ty *addr, Flags) SYCL_ESIMD_FUNCTION;
796 
809  template <int ChunkSize = 32,
810  typename PropertyListT = oneapi::experimental::empty_properties_t>
811  ESIMD_INLINE std::enable_if_t<
812  ext::oneapi::experimental::is_property_list_v<PropertyListT>>
813  copy_from(const Ty *addr, PropertyListT = {}) SYCL_ESIMD_FUNCTION;
814 
826  template <typename AccessorT, typename Flags = element_aligned_tag,
827  int ChunkSize = 32>
828  ESIMD_INLINE std::enable_if_t<detail::is_device_accessor_with_v<
829  AccessorT, accessor_mode_cap::can_read> &&
830  is_simd_flag_type_v<Flags>>
831  copy_from(AccessorT acc,
832 #ifdef __ESIMD_FORCE_STATELESS_MEM
833  uint64_t offset,
834 #else
835  uint32_t offset,
836 #endif
837  Flags) SYCL_ESIMD_FUNCTION;
838 
852  template <typename AccessorT, int ChunkSize = 32,
853  typename PropertyListT = oneapi::experimental::empty_properties_t>
854  ESIMD_INLINE std::enable_if_t<
855  detail::is_device_accessor_with_v<AccessorT,
856  accessor_mode_cap::can_read> &&
857  ext::oneapi::experimental::is_property_list_v<PropertyListT>>
858  copy_from(AccessorT acc,
859 #ifdef __ESIMD_FORCE_STATELESS_MEM
860  uint64_t offset,
861 #else
862  uint32_t offset,
863 #endif
864  PropertyListT = {}) SYCL_ESIMD_FUNCTION;
865 
877  template <typename AccessorT, typename Flags = element_aligned_tag,
878  int ChunkSize = 32>
879  ESIMD_INLINE std::enable_if_t<detail::is_local_accessor_with_v<
880  AccessorT, accessor_mode_cap::can_read> &&
881  is_simd_flag_type_v<Flags>,
882  void>
883  copy_from(AccessorT acc, uint32_t offset, Flags) SYCL_ESIMD_FUNCTION;
884 
898  template <typename AccessorT, int ChunkSize = 32,
899  typename PropertyListT = oneapi::experimental::empty_properties_t>
900  ESIMD_INLINE std::enable_if_t<
901  detail::is_local_accessor_with_v<AccessorT,
902  accessor_mode_cap::can_read> &&
903  ext::oneapi::experimental::is_property_list_v<PropertyListT>,
904  void>
905  copy_from(AccessorT acc, uint32_t offset,
906  PropertyListT = {}) SYCL_ESIMD_FUNCTION;
907 
914  template <typename Flags, int ChunkSize = 32>
915  ESIMD_INLINE std::enable_if_t<is_simd_flag_type_v<Flags>>
916  copy_to(Ty *addr, Flags) const SYCL_ESIMD_FUNCTION;
917 
925  template <int ChunkSize = 32,
926  typename PropertyListT = oneapi::experimental::empty_properties_t>
927  ESIMD_INLINE std::enable_if_t<
928  ext::oneapi::experimental::is_property_list_v<PropertyListT>>
929  copy_to(Ty *addr, PropertyListT = {}) const SYCL_ESIMD_FUNCTION;
930 
940  template <typename AccessorT, typename Flags, int ChunkSize = 32>
941  ESIMD_INLINE std::enable_if_t<detail::is_device_accessor_with_v<
942  AccessorT, accessor_mode_cap::can_write> &&
943  is_simd_flag_type_v<Flags>>
944  copy_to(AccessorT acc,
945 #ifdef __ESIMD_FORCE_STATELESS_MEM
946  uint64_t offset,
947 #else
948  uint32_t offset,
949 #endif
950  Flags) const SYCL_ESIMD_FUNCTION;
951 
962  template <typename AccessorT, int ChunkSize = 32,
963  typename PropertyListT = oneapi::experimental::empty_properties_t>
964  ESIMD_INLINE std::enable_if_t<
965  detail::is_device_accessor_with_v<AccessorT,
966  accessor_mode_cap::can_write> &&
967  ext::oneapi::experimental::is_property_list_v<PropertyListT>>
968  copy_to(AccessorT acc,
969 #ifdef __ESIMD_FORCE_STATELESS_MEM
970  uint64_t offset,
971 #else
972  uint32_t offset,
973 #endif
974  PropertyListT = {}) const SYCL_ESIMD_FUNCTION;
975 
985  template <typename AccessorT, typename Flags, int ChunkSize = 32>
986  ESIMD_INLINE std::enable_if_t<detail::is_local_accessor_with_v<
987  AccessorT, accessor_mode_cap::can_write> &&
988  is_simd_flag_type_v<Flags>,
989  void>
990  copy_to(AccessorT acc, uint32_t offset, Flags) const SYCL_ESIMD_FUNCTION;
991 
1002  template <typename AccessorT, int ChunkSize = 32,
1003  typename PropertyListT = oneapi::experimental::empty_properties_t>
1004  ESIMD_INLINE std::enable_if_t<
1005  detail::is_local_accessor_with_v<AccessorT,
1006  accessor_mode_cap::can_write> &&
1007  ext::oneapi::experimental::is_property_list_v<PropertyListT>,
1008  void>
1009  copy_to(AccessorT acc, uint32_t offset,
1010  PropertyListT = {}) const SYCL_ESIMD_FUNCTION;
1011 
1012  // Unary operations.
1013 
1017  template <class T1 = Ty, class = std::enable_if_t<std::is_integral_v<T1>>>
1018  Derived operator~() const {
1019  return Derived{
1020  detail::vector_unary_op<detail::UnaryOp::bit_not, T1, N>(data())};
1021  }
1022 
1029  template <class T1 = Ty, class = std::enable_if_t<std::is_integral_v<T1>>>
1030  simd_mask_type<N> operator!() const {
1031  return *this == 0;
1032  }
1033 
1034 #define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \
1035  \
1036  \
1037  \
1038  \
1039  \
1040  template <class T1, class SimdT, \
1041  class = std::enable_if_t<(is_simd_type_v<Derived> == \
1042  is_simd_type_v<SimdT>)&&COND>> \
1043  Derived &operator OPASSIGN( \
1044  const __ESIMD_DNS::simd_obj_impl<T1, N, SimdT> &RHS) { \
1045  auto Res = *this BINOP RHS; \
1046  using ResT = decltype(Res); \
1047  set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
1048  length>(Res.data())); \
1049  return cast_this_to_derived(); \
1050  } \
1051  \
1052  \
1053  \
1054  \
1055  \
1056  \
1057  template <class SimdT1, class RegionT1, \
1058  class T1 = typename RegionT1::element_type, \
1059  class = std::enable_if_t< \
1060  (is_simd_type_v<Derived> == \
1061  is_simd_type_v<SimdT1>)&&(RegionT1::length == length) && \
1062  COND>> \
1063  Derived &operator OPASSIGN( \
1064  const __ESIMD_NS::simd_view<SimdT1, RegionT1> &RHS) { \
1065  auto Res = *this BINOP RHS.read(); \
1066  using ResT = decltype(Res); \
1067  set(__ESIMD_DNS::convert_vector<element_type, typename ResT::element_type, \
1068  length>(Res.data())); \
1069  return cast_this_to_derived(); \
1070  } \
1071  \
1072  \
1073  \
1074  \
1075  template <class T1, class = std::enable_if_t<COND>> \
1076  Derived &operator OPASSIGN(T1 RHS) { \
1077  if constexpr (is_simd_type_v<Derived>) { \
1078  using RHSVecT = __ESIMD_DNS::construct_a_simd_type_t<Derived, T1, N>; \
1079  return *this OPASSIGN RHSVecT(RHS); \
1080  } else { \
1081  return *this OPASSIGN Derived((RawTy)RHS); \
1082  } \
1083  }
1084 
1085 // Bitwise operations are defined for simd objects and masks, and both operands
1086 // must be integral
1087 #define __ESIMD_BITWISE_OP_FILTER \
1088  std::is_integral_v<element_type> &&std::is_integral_v<T1>
1101 #undef __ESIMD_BITWISE_OP_FILTER
1102 
1103 // Bit shift operations are defined only for simd objects (not for masks), and
1104 // both operands must be integral
1105 #define __ESIMD_SHIFT_OP_FILTER \
1106  std::is_integral_v<element_type> &&std::is_integral_v<T1> \
1107  &&__ESIMD_DNS::is_simd_type_v<Derived>
1108 
1116 #undef __ESIMD_SHIFT_OP_FILTER
1117 
1118 // Arithmetic operations are defined only for simd objects, and the second
1119 // operand's element type must be vectorizable. This requirement for 'this'
1120 // is fulfilled, because otherwise 'this' couldn't have been constructed.
1121 #define __ESIMD_ARITH_OP_FILTER \
1122  __ESIMD_DNS::is_simd_type_v<Derived> &&__ESIMD_DNS::is_vectorizable_v<T1>
1123 
1132 #undef __ESIMD_ARITH_OP_FILTER
1133 #undef __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN
1134 
1135  // Getter for the test proxy member, if enabled
1136  __ESIMD_DECLARE_TEST_PROXY_ACCESS
1137 
1138 private:
1139  // The underlying data for this vector.
1140  raw_vector_type M_data;
1141 
1142  template <int ChunkSize, typename Flags, typename AccessorT, typename TOffset>
1143  ESIMD_INLINE std::enable_if_t<is_simd_flag_type_v<Flags>>
1144  copy_to_impl(AccessorT acc, TOffset offset) const SYCL_ESIMD_FUNCTION;
1145  template <int ChunkSize, typename PropertyListT, typename AccessorT,
1146  typename TOffset>
1147  ESIMD_INLINE std::enable_if_t<
1148  ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1149  copy_to_impl(AccessorT acc, TOffset offset,
1150  PropertyListT = {}) const SYCL_ESIMD_FUNCTION;
1151  template <int ChunkSize, typename Flags, typename AccessorT, typename TOffset>
1152  ESIMD_INLINE std::enable_if_t<is_simd_flag_type_v<Flags>>
1153  copy_from_impl(AccessorT acc, TOffset offset) SYCL_ESIMD_FUNCTION;
1154  template <int ChunkSize, typename PropertyListT, typename AccessorT,
1155  typename TOffset>
1156  ESIMD_INLINE std::enable_if_t<
1157  ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1158  copy_from_impl(AccessorT acc, TOffset offset,
1159  PropertyListT = {}) SYCL_ESIMD_FUNCTION;
1160 
1161 protected:
1162  // The test proxy if enabled
1163  __ESIMD_DECLARE_TEST_PROXY
1164 
1165  void set(const raw_vector_type &Val) {
1166 #ifndef __SYCL_DEVICE_ONLY__
1167  M_data = Val;
1168 #else
1169  __esimd_vstore<RawTy, N>(&M_data, Val);
1170 #endif
1171  }
1172 };
1174 
1175 } // namespace detail
1176 
1177 template <>
1178 struct is_simd_flag_type<detail::dqword_element_aligned_tag> : std::true_type {
1179 };
1180 } // namespace ext::intel::esimd
1181 } // namespace _V1
1182 } // namespace sycl
This class is a simd_obj_impl specialization representing a simd mask, which is basically a simd_obj_...
This is a base class for all ESIMD simd classes with real storage (simd, simd_mask_impl).
ESIMD_INLINE void writeRegion(std::pair< TR, UR > Region, const vector_type_t< ElemTy, TR::length > &Val)
Write a simd_obj_impl-vector into a nested region of a simd_obj_impl object.
simd_obj_impl(T1 Val) noexcept
Broadcast constructor.
resize_a_simd_type_t< Derived, Size > iselect(const simd< uint16_t, Size > &Indices)
Indirect select - select and extract multiple elements with given variable indices.
simd_obj_impl(const Ty *ptr, Flags) noexcept
Pointer-based load constructor.
resize_a_simd_type_t< Derived, Rep *N > replicate() const
Replicates contents of this vector a number of times into a new vector.
ESIMD_INLINE std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > copy_from(AccessorT acc, uint32_t offset, PropertyListT={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
Derived & operator=(const simd_obj_impl &other) noexcept
Copy assignment operator.
get_vector_element_type< Derived > element_type
Element type of the derived (user) class.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_read > &&is_simd_flag_type_v< Flags >, void > copy_from(AccessorT acc, uint32_t offset, Flags) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
Derived & write(const Derived &Val)
Replaces the underlying data with the one taken from another object.
Ty operator[](int i) const
Get value of this vector's element.
simd_obj_impl(AccessorT acc, uint32_t offset, PropertyListT={}) noexcept
Accessor-based load constructor.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, void > copy_to(AccessorT acc, uint32_t offset, PropertyListT={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
ESIMD_INLINE std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > copy_from(const Ty *addr, PropertyListT={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
static constexpr int length
The number of elements in this object.
simd_obj_impl(Ty Base, Ty Step) noexcept
Arithmetic progression constructor.
auto bit_cast_view() &
Create a 2-dimensional view of this object.
uint16_t all() const
See if all elements are non-zero.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_write > &&is_simd_flag_type_v< Flags >, void > copy_to(AccessorT acc, uint32_t offset, Flags) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
simd_obj_impl(const simd_obj_impl< Ty1, N, Derived1, SFINAE > &other)
Implicit conversion constructor from another simd_obj_impl object.
void iupdate(const simd< uint16_t, Size > &Indices, const resize_a_simd_type_t< Derived, Size > &Val, const simd_mask_type< Size > &Mask)
Indirect update - update multiple elements with given variable indices.
ESIMD_INLINE std::enable_if_t< detail::is_local_accessor_with_v< AccessorT, accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT >, void > copy_from(AccessorT acc, uint32_t offset, PropertyListT={}) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
simd_obj_impl(const simd_obj_impl &other)
Copy constructor.
void merge(const Derived &Val, const simd_mask_type< N > &Mask)
"Merges" this object's value with another object: replaces part of the underlying data with the one t...
simd_mask_type< N > operator!() const
Unary logical negation operator, available in all subclasses, but only for integral element types (si...
simd_obj_impl(const Ty *ptr, PropertyListT={}) noexcept
Pointer-based load constructor.
void iupdate(ushort Index, Ty V)
Update single element with variable index.
simd_obj_impl(AccessorT acc, uint32_t offset, Flags) noexcept
Accessor-based load constructor.
RawTy raw_element_type
The element type of the raw storage vector.
ESIMD_INLINE void writeRegion(RTy Region, const vector_type_t< ElemTy, RTy::length > &Val)
Write a simd_obj_impl-vector into a basic region of a simd_obj_impl object.
ESIMD_INLINE std::enable_if_t< is_simd_flag_type_v< Flags > > copy_to(Ty *addr, Flags) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
simd_obj_impl(const Ty(&&Arr)[N1]) noexcept
Rvalue array-based constructor.
ESIMD_INLINE std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, accessor_mode_cap::can_write > &&is_simd_flag_type_v< Flags > > copy_to(AccessorT acc, uint32_t offset, Flags) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
void merge(const Derived &Val1, Derived Val2, const simd_mask_type< N > &Mask)
Merges given two objects with a mask and writes resulting data into this object.
ESIMD_INLINE std::enable_if_t< is_simd_flag_type_v< Flags > > copy_from(const Ty *addr, Flags) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
Derived operator~() const
Per-element bitwise inversion, available in all subclasses, but only for integral element types (simd...
ESIMD_INLINE std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, accessor_mode_cap::can_write > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > copy_to(AccessorT acc, uint32_t offset, PropertyListT={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
ESIMD_INLINE std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > copy_to(Ty *addr, PropertyListT={}) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
auto bit_cast_view() &
Create a 1-dimensional view of this object.
resize_a_simd_type_t< Derived, Rep *W > replicate_w(uint16_t Offset) const
Shortcut to replicate_vs_w_hs with VS=0 and HS=1 to replicate a single "dense" (w/o gaps between elem...
resize_a_simd_type_t< Derived, Rep *W > replicate_vs_w_hs(uint16_t Offset) const
This function "replicates" a portion of this object's elements into a new object.
__ESIMD_DECLARE_TEST_PROXY void set(const raw_vector_type &Val)
simd_view< Derived, region1d_t< Ty, Size, Stride > > select(uint16_t Offset=0) &
Select elements of this object into a subregion and create a 1D view for for it.
simd_obj_impl(const raw_vector_type &Val)
Implicit conversion constructor from a raw vector object.
resize_a_simd_type_t< Derived, Size > select(uint16_t Offset=0) &&
Select and extract a subregion of this object's elements and return it as a new vector object.
simd_view< Derived, region1d_scalar_t< Ty > > operator[](int i)
Return writable view of a single element.
ESIMD_INLINE std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, accessor_mode_cap::can_read > &&is_simd_flag_type_v< Flags > > copy_from(AccessorT acc, uint32_t offset, Flags) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
vector_type_t< RawTy, N > raw_vector_type
The underlying raw storage vector data type.
uint16_t any() const
See if any element is non-zero.
resize_a_simd_type_t< Derived, Rep *W > replicate_vs_w(uint16_t Offset) const
Shortcut to replicate_vs_w_hs with HS=1 to replicate dense blocks.
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
The main simd vector class.
Definition: simd.hpp:53
#define __esimd_dbg_print(a)
Definition: types.hpp:24
constexpr vector_aligned_tag vector_aligned
static constexpr bool is_simd_flag_type_v
Checks if given type is a simd load/store flag.
constexpr overaligned_tag< N > overaligned
constexpr element_aligned_tag element_aligned
unsigned short ushort
Definition: common.hpp:42
typename add_alignment_property< PropertyListT, Alignment >::type add_alignment_property_t
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:96
constexpr alignment_key::value_t< K > alignment
properties< std::tuple<> > empty_properties_t
Definition: properties.hpp:234
Definition: access.hpp:18
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
Definition: math.hpp:436
#define __ESIMD_SHIFT_OP_FILTER
#define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND)
#define __ESIMD_BITWISE_OP_FILTER
#define __ESIMD_ARITH_OP_FILTER
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Checks if type is a simd load/store flag.