DPC++ Runtime
Runtime libraries for oneAPI DPC++
types.hpp
Go to the documentation of this file.
1 //==---------------- types.hpp --- SYCL types ------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 // Check if Clang's ext_vector_type attribute is available. Host compiler
12 // may not be Clang, and Clang may not be built with the extension.
13 #ifdef __clang__
14 #ifndef __has_extension
15 #define __has_extension(x) 0
16 #endif
17 #ifdef __HAS_EXT_VECTOR_TYPE__
18 #error "Undefine __HAS_EXT_VECTOR_TYPE__ macro"
19 #endif
20 #if __has_extension(attribute_ext_vector_type)
21 #define __HAS_EXT_VECTOR_TYPE__
22 #endif
23 #endif // __clang__
24 
25 #if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__)
26 #error "SYCL device compiler is built without ext_vector_type support"
27 #endif
28 
29 #if defined(__SYCL_DEVICE_ONLY__)
30 #define __SYCL_USE_EXT_VECTOR_TYPE__
31 #endif
32 
33 #include <sycl/access/access.hpp> // for decorated, address_space
34 #include <sycl/aliases.hpp> // for half, cl_char, cl_int
35 #include <sycl/detail/common.hpp> // for ArrayCreator, RepeatV...
36 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED
37 #include <sycl/detail/generic_type_lists.hpp> // for vector_basic_list
38 #include <sycl/detail/generic_type_traits.hpp> // for is_sigeninteger, is_s...
39 #include <sycl/detail/memcpy.hpp> // for memcpy
40 #include <sycl/detail/type_list.hpp> // for is_contained
41 #include <sycl/detail/type_traits.hpp> // for is_floating_point
42 #include <sycl/detail/vector_convert.hpp> // for convertImpl
43 #include <sycl/detail/vector_traits.hpp> // for vector_alignment
44 #include <sycl/exception.hpp> // for make_error_code, errc
45 #include <sycl/half_type.hpp> // for StorageT, half, Vec16...
46 #include <sycl/marray.hpp> // for __SYCL_BINOP, __SYCL_...
47 #include <sycl/multi_ptr.hpp> // for multi_ptr
48 
49 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
50 
51 #include <array> // for array
52 #include <assert.h> // for assert
53 #include <cstddef> // for size_t, NULL, byte
54 #include <cstdint> // for uint8_t, int16_t, int...
55 #include <functional> // for divides, multiplies
56 #include <iterator> // for pair
57 #include <optional> // for optional
58 #include <ostream> // for operator<<, basic_ost...
59 #include <tuple> // for tuple
60 #include <type_traits> // for enable_if_t, is_same
61 #include <utility> // for index_sequence, make_...
62 #include <variant> // for tuple, variant
63 
64 // 4.10.1: Scalar data types
65 // 4.10.2: SYCL vector types
66 
67 namespace sycl {
68 inline namespace _V1 {
69 
70 struct elem {
71  static constexpr int x = 0;
72  static constexpr int y = 1;
73  static constexpr int z = 2;
74  static constexpr int w = 3;
75  static constexpr int r = 0;
76  static constexpr int g = 1;
77  static constexpr int b = 2;
78  static constexpr int a = 3;
79  static constexpr int s0 = 0;
80  static constexpr int s1 = 1;
81  static constexpr int s2 = 2;
82  static constexpr int s3 = 3;
83  static constexpr int s4 = 4;
84  static constexpr int s5 = 5;
85  static constexpr int s6 = 6;
86  static constexpr int s7 = 7;
87  static constexpr int s8 = 8;
88  static constexpr int s9 = 9;
89  static constexpr int sA = 10;
90  static constexpr int sB = 11;
91  static constexpr int sC = 12;
92  static constexpr int sD = 13;
93  static constexpr int sE = 14;
94  static constexpr int sF = 15;
95 };
96 
97 namespace detail {
98 // select_apply_cl_t selects from T8/T16/T32/T64 basing on
99 // sizeof(_IN). expected to handle scalar types in _IN.
100 template <typename _IN, typename T8, typename T16, typename T32, typename T64>
101 using select_apply_cl_t = std::conditional_t<
102  sizeof(_IN) == 1, T8,
103  std::conditional_t<sizeof(_IN) == 2, T16,
104  std::conditional_t<sizeof(_IN) == 4, T32, T64>>>;
105 
106 template <typename T> struct vec_helper {
107  using RetType = T;
108  static constexpr RetType get(T value) { return value; }
109  static constexpr RetType set(T value) { return value; }
110 };
111 template <> struct vec_helper<bool> {
112  using RetType = select_apply_cl_t<bool, std::int8_t, std::int16_t,
113  std::int32_t, std::int64_t>;
114  static constexpr RetType get(bool value) { return value; }
115  static constexpr RetType set(bool value) { return value; }
116 };
117 
118 template <> struct vec_helper<sycl::ext::oneapi::bfloat16> {
121  static constexpr RetType get(BFloat16StorageT value) {
122 #if defined(__SYCL_BITCAST_IS_CONSTEXPR)
123  return sycl::bit_cast<RetType>(value);
124 #else
125  // awkward workaround. sycl::bit_cast isn't constexpr in older GCC
126  // C++20 will give us both std::bit_cast and constexpr reinterpet for void*
127  // but neither available yet.
128  union {
131  } result = {};
132  result.storage = value;
133  return result.bf16;
134 #endif
135  }
136 
137  static constexpr RetType get(RetType value) { return value; }
138 
139  static constexpr BFloat16StorageT set(RetType value) {
140 #if defined(__SYCL_BITCAST_IS_CONSTEXPR)
141  return sycl::bit_cast<BFloat16StorageT>(value);
142 #else
143  union {
146  } result = {};
147  result.bf16 = value;
148  return result.storage;
149 #endif
150  }
151 };
152 
153 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
154 template <> struct vec_helper<std::byte> {
155  using RetType = std::uint8_t;
156  static constexpr RetType get(std::byte value) { return (RetType)value; }
157  static constexpr RetType set(std::byte value) { return (RetType)value; }
158  static constexpr std::byte get(std::uint8_t value) {
159  return (std::byte)value;
160  }
161  static constexpr std::byte set(std::uint8_t value) {
162  return (std::byte)value;
163  }
164 };
165 #endif
166 
167 template <typename VecT, typename OperationLeftT, typename OperationRightT,
168  template <typename> class OperationCurrentT, int... Indexes>
169 class SwizzleOp;
170 
171 template <typename T, int N, typename V = void> struct VecStorage;
172 
173 // Element type for relational operator return value.
174 template <typename DataT>
175 using rel_t = typename std::conditional_t<
176  sizeof(DataT) == sizeof(opencl::cl_char), opencl::cl_char,
177  typename std::conditional_t<
178  sizeof(DataT) == sizeof(opencl::cl_short), opencl::cl_short,
179  typename std::conditional_t<
180  sizeof(DataT) == sizeof(opencl::cl_int), opencl::cl_int,
181  typename std::conditional_t<sizeof(DataT) ==
182  sizeof(opencl::cl_long),
183  opencl::cl_long, bool>>>>;
184 
185 // Special type indicating that SwizzleOp should just read value from vector -
186 // not trying to perform any operations. Should not be called.
187 template <typename T> class GetOp {
188 public:
189  using DataT = T;
190  DataT getValue(size_t) const { return (DataT)0; }
191  DataT operator()(DataT, DataT) { return (DataT)0; }
192 };
193 
194 // Special type for working SwizzleOp with scalars, stores a scalar and gives
195 // the scalar at any index. Provides interface is compatible with SwizzleOp
196 // operations
197 template <typename T> class GetScalarOp {
198 public:
199  using DataT = T;
200  GetScalarOp(DataT Data) : m_Data(Data) {}
201  DataT getValue(size_t) const { return m_Data; }
202 
203 private:
204  DataT m_Data;
205 };
206 
207 template <typename T> struct EqualTo {
208  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
209  return (Lhs == Rhs) ? -1 : 0;
210  }
211 };
212 
213 template <typename T> struct NotEqualTo {
214  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
215  return (Lhs != Rhs) ? -1 : 0;
216  }
217 };
218 
219 template <typename T> struct GreaterEqualTo {
220  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
221  return (Lhs >= Rhs) ? -1 : 0;
222  }
223 };
224 
225 template <typename T> struct LessEqualTo {
226  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
227  return (Lhs <= Rhs) ? -1 : 0;
228  }
229 };
230 
231 template <typename T> struct GreaterThan {
232  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
233  return (Lhs > Rhs) ? -1 : 0;
234  }
235 };
236 
237 template <typename T> struct LessThan {
238  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
239  return (Lhs < Rhs) ? -1 : 0;
240  }
241 };
242 
243 template <typename T> struct LogicalAnd {
244  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
245  return (Lhs && Rhs) ? -1 : 0;
246  }
247 };
248 
249 template <typename T> struct LogicalOr {
250  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
251  return (Lhs || Rhs) ? -1 : 0;
252  }
253 };
254 
255 template <typename T> struct RShift {
256  constexpr T operator()(const T &Lhs, const T &Rhs) const {
257  return Lhs >> Rhs;
258  }
259 };
260 
261 template <typename T> struct LShift {
262  constexpr T operator()(const T &Lhs, const T &Rhs) const {
263  return Lhs << Rhs;
264  }
265 };
266 
267 // Forward declarations
268 template <typename TransformedArgType, int Dims, typename KernelType>
269 class RoundedRangeKernel;
270 template <typename TransformedArgType, int Dims, typename KernelType>
271 class RoundedRangeKernelWithKH;
272 
273 } // namespace detail
274 
275 template <typename T> using vec_data = detail::vec_helper<T>;
276 
277 template <typename T>
279 
284 template <typename Type, int NumElements> class vec {
285  using DataT = Type;
286 
287  // This represent type of underlying value. There should be only one field
288  // in the class, so vec<float, 16> should be equal to float16 in memory.
289  using DataType = typename detail::VecStorage<DataT, NumElements>::DataType;
290 
291  static constexpr bool IsHostHalf =
292  std::is_same_v<DataT, sycl::detail::half_impl::half> &&
293  std::is_same_v<sycl::detail::half_impl::StorageT,
295 
296  static constexpr bool IsBfloat16 =
297  std::is_same_v<DataT, sycl::ext::oneapi::bfloat16>;
298 
299  static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements;
300  static constexpr size_t Sz = sizeof(DataT) * AdjustedNum;
301  static constexpr bool IsSizeGreaterThanMaxAlign =
303 
304  // TODO: There is no support for vector half type on host yet.
305  // Also, when Sz is greater than alignment, we use std::array instead of
306  // vector extension. This is for MSVC compatibility, which has a max alignment
307  // of 64 for direct params. If we drop MSVC, we can have alignment the same as
308  // size and use vector extensions for all sizes.
309  static constexpr bool IsUsingArrayOnDevice =
310  (IsHostHalf || IsBfloat16 || IsSizeGreaterThanMaxAlign);
311 
312 #if defined(__SYCL_DEVICE_ONLY__)
313  static constexpr bool NativeVec = NumElements > 1 && !IsUsingArrayOnDevice;
314  static constexpr bool IsUsingArrayOnHost = false; // not compiling for host.
315 #else
316  static constexpr bool NativeVec = false;
317  static constexpr bool IsUsingArrayOnHost = true; // host always std::array.
318 #endif
319 
320  static constexpr int getNumElements() { return NumElements; }
321 
322  // SizeChecker is needed for vec(const argTN &... args) ctor to validate args.
323  template <int Counter, int MaxValue, class...>
324  struct SizeChecker : std::conditional_t<Counter == MaxValue, std::true_type,
325  std::false_type> {};
326 
327  template <int Counter, int MaxValue, typename DataT_, class... tail>
328  struct SizeChecker<Counter, MaxValue, DataT_, tail...>
329  : std::conditional_t<Counter + 1 <= MaxValue,
330  SizeChecker<Counter + 1, MaxValue, tail...>,
331  std::false_type> {};
332 
333  // Utility trait for creating an std::array from an vector argument.
334  template <typename DataT_, typename T, std::size_t... Is>
335  static constexpr std::array<DataT_, sizeof...(Is)>
336  VecToArray(const vec<T, sizeof...(Is)> &V, std::index_sequence<Is...>) {
337  return {static_cast<DataT_>(V.getValue(Is))...};
338  }
339  template <typename DataT_, typename T, int N, typename T2, typename T3,
340  template <typename> class T4, int... T5, std::size_t... Is>
341  static constexpr std::array<DataT_, sizeof...(Is)>
342  VecToArray(const detail::SwizzleOp<vec<T, N>, T2, T3, T4, T5...> &V,
343  std::index_sequence<Is...>) {
344  return {static_cast<DataT_>(V.getValue(Is))...};
345  }
346  template <typename DataT_, typename T, int N, typename T2, typename T3,
347  template <typename> class T4, int... T5, std::size_t... Is>
348  static constexpr std::array<DataT_, sizeof...(Is)>
349  VecToArray(const detail::SwizzleOp<const vec<T, N>, T2, T3, T4, T5...> &V,
350  std::index_sequence<Is...>) {
351  return {static_cast<DataT_>(V.getValue(Is))...};
352  }
353  template <typename DataT_, typename T, int N>
354  static constexpr std::array<DataT_, N>
355  FlattenVecArgHelper(const vec<T, N> &A) {
356  return VecToArray<DataT_>(A, std::make_index_sequence<N>());
357  }
358  template <typename DataT_, typename T, int N, typename T2, typename T3,
359  template <typename> class T4, int... T5>
360  static constexpr std::array<DataT_, sizeof...(T5)> FlattenVecArgHelper(
361  const detail::SwizzleOp<vec<T, N>, T2, T3, T4, T5...> &A) {
362  return VecToArray<DataT_>(A, std::make_index_sequence<sizeof...(T5)>());
363  }
364  template <typename DataT_, typename T, int N, typename T2, typename T3,
365  template <typename> class T4, int... T5>
366  static constexpr std::array<DataT_, sizeof...(T5)> FlattenVecArgHelper(
367  const detail::SwizzleOp<const vec<T, N>, T2, T3, T4, T5...> &A) {
368  return VecToArray<DataT_>(A, std::make_index_sequence<sizeof...(T5)>());
369  }
370  template <typename DataT_, typename T>
371  static constexpr auto FlattenVecArgHelper(const T &A) {
372  return std::array<DataT_, 1>{vec_data<DataT_>::get(static_cast<DataT_>(A))};
373  }
374  template <typename DataT_, typename T> struct FlattenVecArg {
375  constexpr auto operator()(const T &A) const {
376  return FlattenVecArgHelper<DataT_>(A);
377  }
378  };
379 
380  // Alias for shortening the vec arguments to array converter.
381  template <typename DataT_, typename... ArgTN>
382  using VecArgArrayCreator =
383  detail::ArrayCreator<DataT_, FlattenVecArg, ArgTN...>;
384 
385 #define __SYCL_ALLOW_VECTOR_SIZES(num_elements) \
386  template <int Counter, int MaxValue, typename DataT_, class... tail> \
387  struct SizeChecker<Counter, MaxValue, vec<DataT_, num_elements>, tail...> \
388  : std::conditional_t< \
389  Counter + (num_elements) <= MaxValue, \
390  SizeChecker<Counter + (num_elements), MaxValue, tail...>, \
391  std::false_type> {}; \
392  template <int Counter, int MaxValue, typename DataT_, typename T2, \
393  typename T3, template <typename> class T4, int... T5, \
394  class... tail> \
395  struct SizeChecker< \
396  Counter, MaxValue, \
397  detail::SwizzleOp<vec<DataT_, num_elements>, T2, T3, T4, T5...>, \
398  tail...> \
399  : std::conditional_t< \
400  Counter + sizeof...(T5) <= MaxValue, \
401  SizeChecker<Counter + sizeof...(T5), MaxValue, tail...>, \
402  std::false_type> {}; \
403  template <int Counter, int MaxValue, typename DataT_, typename T2, \
404  typename T3, template <typename> class T4, int... T5, \
405  class... tail> \
406  struct SizeChecker< \
407  Counter, MaxValue, \
408  detail::SwizzleOp<const vec<DataT_, num_elements>, T2, T3, T4, T5...>, \
409  tail...> \
410  : std::conditional_t< \
411  Counter + sizeof...(T5) <= MaxValue, \
412  SizeChecker<Counter + sizeof...(T5), MaxValue, tail...>, \
413  std::false_type> {};
414 
415  __SYCL_ALLOW_VECTOR_SIZES(1)
416  __SYCL_ALLOW_VECTOR_SIZES(2)
417  __SYCL_ALLOW_VECTOR_SIZES(3)
418  __SYCL_ALLOW_VECTOR_SIZES(4)
419  __SYCL_ALLOW_VECTOR_SIZES(8)
420  __SYCL_ALLOW_VECTOR_SIZES(16)
421 #undef __SYCL_ALLOW_VECTOR_SIZES
422 
423  // TypeChecker is needed for vec(const argTN &... args) ctor to validate args.
424  template <typename T, typename DataT_>
425  struct TypeChecker : std::is_convertible<T, DataT_> {};
426 #define __SYCL_ALLOW_VECTOR_TYPES(num_elements) \
427  template <typename DataT_> \
428  struct TypeChecker<vec<DataT_, num_elements>, DataT_> : std::true_type {}; \
429  template <typename DataT_, typename T2, typename T3, \
430  template <typename> class T4, int... T5> \
431  struct TypeChecker< \
432  detail::SwizzleOp<vec<DataT_, num_elements>, T2, T3, T4, T5...>, DataT_> \
433  : std::true_type {}; \
434  template <typename DataT_, typename T2, typename T3, \
435  template <typename> class T4, int... T5> \
436  struct TypeChecker< \
437  detail::SwizzleOp<const vec<DataT_, num_elements>, T2, T3, T4, T5...>, \
438  DataT_> : std::true_type {};
439 
440  __SYCL_ALLOW_VECTOR_TYPES(1)
441  __SYCL_ALLOW_VECTOR_TYPES(2)
442  __SYCL_ALLOW_VECTOR_TYPES(3)
443  __SYCL_ALLOW_VECTOR_TYPES(4)
444  __SYCL_ALLOW_VECTOR_TYPES(8)
445  __SYCL_ALLOW_VECTOR_TYPES(16)
446 #undef __SYCL_ALLOW_VECTOR_TYPES
447 
448  template <int... Indexes>
449  using Swizzle =
450  detail::SwizzleOp<vec, detail::GetOp<DataT>, detail::GetOp<DataT>,
451  detail::GetOp, Indexes...>;
452 
453  template <int... Indexes>
454  using ConstSwizzle =
455  detail::SwizzleOp<const vec, detail::GetOp<DataT>, detail::GetOp<DataT>,
456  detail::GetOp, Indexes...>;
457 
458  // Shortcuts for args validation in vec(const argTN &... args) ctor.
459  template <typename... argTN>
460  using EnableIfSuitableTypes = typename std::enable_if_t<
461  std::conjunction_v<TypeChecker<argTN, DataT>...>>;
462 
463  template <typename... argTN>
464  using EnableIfSuitableNumElements =
465  typename std::enable_if_t<SizeChecker<0, NumElements, argTN...>::value>;
466 
467  template <size_t... Is>
468  constexpr vec(const std::array<vec_data_t<DataT>, NumElements> &Arr,
469  std::index_sequence<Is...>)
470  : m_Data{([&](vec_data_t<DataT> v) constexpr {
471  if constexpr (std::is_same_v<sycl::ext::oneapi::bfloat16, DataT>)
472  return v.value;
473  else
474  return vec_data_t<DataT>(static_cast<DataT>(v));
475  })(Arr[Is])...} {}
476 
477 public:
478  using element_type = DataT;
479  using value_type = DataT;
480  using rel_t = detail::rel_t<DataT>;
481 #ifdef __SYCL_DEVICE_ONLY__
482  using vector_t =
483  typename detail::VecStorage<DataT, NumElements>::VectorDataType;
484 #endif // __SYCL_DEVICE_ONLY__
485 
486  vec() = default;
487 
488  constexpr vec(const vec &Rhs) = default;
489  constexpr vec(vec &&Rhs) = default;
490 
491  constexpr vec &operator=(const vec &Rhs) = default;
492 
493  // W/o this, things like "vec<char,*> = vec<signed char, *>" doesn't work.
494  template <typename Ty = DataT>
495  typename std::enable_if_t<!std::is_same_v<Ty, rel_t> &&
496  std::is_convertible_v<vec_data_t<Ty>, rel_t>,
497  vec &>
498  operator=(const vec<rel_t, NumElements> &Rhs) {
499  *this = Rhs.template as<vec>();
500  return *this;
501  }
502 
503 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
504  template <typename T = void>
505  using EnableIfNotHostHalf = typename std::enable_if_t<!IsHostHalf, T>;
506 
507  template <typename T = void>
508  using EnableIfHostHalf = typename std::enable_if_t<IsHostHalf, T>;
509 
510  template <typename T = void>
511  using EnableIfUsingArrayOnDevice =
512  typename std::enable_if_t<IsUsingArrayOnDevice, T>;
513 
514  template <typename T = void>
515  using EnableIfNotUsingArrayOnDevice =
516  typename std::enable_if_t<!IsUsingArrayOnDevice, T>;
517 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
518 
519  template <typename T = void>
520  using EnableIfUsingArray =
521  typename std::enable_if_t<IsUsingArrayOnDevice || IsUsingArrayOnHost, T>;
522 
523  template <typename T = void>
524  using EnableIfNotUsingArray =
525  typename std::enable_if_t<!IsUsingArrayOnDevice && !IsUsingArrayOnHost,
526  T>;
527 
528 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
529 
530  template <typename Ty = DataT>
531  explicit constexpr vec(const EnableIfNotUsingArrayOnDevice<Ty> &arg)
532  : m_Data{DataType(vec_data<Ty>::get(arg))} {}
533 
534  template <typename Ty = DataT>
535  typename std::enable_if_t<
536  std::is_fundamental_v<vec_data_t<Ty>> ||
537  detail::is_half_or_bf16_v<typename std::remove_const_t<Ty>>,
538  vec &>
539  operator=(const EnableIfNotUsingArrayOnDevice<Ty> &Rhs) {
540  m_Data = (DataType)vec_data<Ty>::get(Rhs);
541  return *this;
542  }
543 
544  template <typename Ty = DataT>
545  explicit constexpr vec(const EnableIfUsingArrayOnDevice<Ty> &arg)
546  : vec{detail::RepeatValue<NumElements>(
547  static_cast<vec_data_t<DataT>>(arg)),
548  std::make_index_sequence<NumElements>()} {}
549 
550  template <typename Ty = DataT>
551  typename std::enable_if_t<
552  std::is_fundamental_v<vec_data_t<Ty>> ||
553  detail::is_half_or_bf16_v<typename std::remove_const_t<Ty>>,
554  vec &>
555  operator=(const EnableIfUsingArrayOnDevice<Ty> &Rhs) {
556  for (int i = 0; i < NumElements; ++i) {
557  setValue(i, Rhs);
558  }
559  return *this;
560  }
561 #else // __SYCL_USE_EXT_VECTOR_TYPE__
562  explicit constexpr vec(const DataT &arg)
563  : vec{detail::RepeatValue<NumElements>(
564  static_cast<vec_data_t<DataT>>(arg)),
565  std::make_index_sequence<NumElements>()} {}
566 
567  template <typename Ty = DataT>
568  typename std::enable_if_t<
569  std::is_fundamental_v<vec_data_t<Ty>> ||
570  detail::is_half_or_bf16_v<typename std::remove_const_t<Ty>>,
571  vec &>
572  operator=(const DataT &Rhs) {
573  for (int i = 0; i < NumElements; ++i) {
574  setValue(i, Rhs);
575  }
576  return *this;
577  }
578 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
579 
580 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
581  // Optimized naive constructors with NumElements of DataT values.
582  // We don't expect compilers to optimize vararg recursive functions well.
583 
584  // Helper type to make specific constructors available only for specific
585  // number of elements.
586  template <int IdxNum, typename T = void>
587  using EnableIfMultipleElems = typename std::enable_if_t<
588  std::is_convertible_v<T, DataT> && NumElements == IdxNum, DataT>;
589  template <typename Ty = DataT>
590  constexpr vec(const EnableIfMultipleElems<2, Ty> Arg0,
591  const EnableIfNotUsingArrayOnDevice<Ty> Arg1)
592  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1)} {}
593  template <typename Ty = DataT>
594  constexpr vec(const EnableIfMultipleElems<3, Ty> Arg0,
595  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2)
596  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
597  vec_data<Ty>::get(Arg2)} {}
598  template <typename Ty = DataT>
599  constexpr vec(const EnableIfMultipleElems<4, Ty> Arg0,
600  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
601  const Ty Arg3)
602  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
603  vec_data<Ty>::get(Arg2), vec_data<Ty>::get(Arg3)} {}
604  template <typename Ty = DataT>
605  constexpr vec(const EnableIfMultipleElems<8, Ty> Arg0,
606  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
607  const DataT Arg3, const DataT Arg4, const DataT Arg5,
608  const DataT Arg6, const DataT Arg7)
609  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
610  vec_data<Ty>::get(Arg2), vec_data<Ty>::get(Arg3),
611  vec_data<Ty>::get(Arg4), vec_data<Ty>::get(Arg5),
612  vec_data<Ty>::get(Arg6), vec_data<Ty>::get(Arg7)} {}
613  template <typename Ty = DataT>
614  constexpr vec(const EnableIfMultipleElems<16, Ty> Arg0,
615  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
616  const DataT Arg3, const DataT Arg4, const DataT Arg5,
617  const DataT Arg6, const DataT Arg7, const DataT Arg8,
618  const DataT Arg9, const DataT ArgA, const DataT ArgB,
619  const DataT ArgC, const DataT ArgD, const DataT ArgE,
620  const DataT ArgF)
621  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
622  vec_data<Ty>::get(Arg2), vec_data<Ty>::get(Arg3),
623  vec_data<Ty>::get(Arg4), vec_data<Ty>::get(Arg5),
624  vec_data<Ty>::get(Arg6), vec_data<Ty>::get(Arg7),
625  vec_data<Ty>::get(Arg8), vec_data<Ty>::get(Arg9),
626  vec_data<Ty>::get(ArgA), vec_data<Ty>::get(ArgB),
627  vec_data<Ty>::get(ArgC), vec_data<Ty>::get(ArgD),
628  vec_data<Ty>::get(ArgE), vec_data<Ty>::get(ArgF)} {}
629 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
630 
631  // Constructor from values of base type or vec of base type. Checks that
632  // base types are match and that the NumElements == sum of lengths of args.
633  template <typename... argTN, typename = EnableIfSuitableTypes<argTN...>,
634  typename = EnableIfSuitableNumElements<argTN...>>
635  constexpr vec(const argTN &...args)
636  : vec{VecArgArrayCreator<vec_data_t<DataT>, argTN...>::Create(args...),
637  std::make_index_sequence<NumElements>()} {}
638 
639 #ifdef __SYCL_DEVICE_ONLY__
640  template <typename vector_t_ = vector_t,
641  typename =
642  typename std::enable_if_t<std::is_same_v<vector_t_, vector_t> &&
643  !std::is_same_v<vector_t_, DataT>>>
644  constexpr vec(vector_t openclVector) {
645  if constexpr (!IsUsingArrayOnDevice) {
646  m_Data = openclVector;
647  } else {
648  m_Data = bit_cast<DataType>(openclVector);
649  }
650  }
651 
652  operator vector_t() const {
653  if constexpr (!IsUsingArrayOnDevice) {
654  return m_Data;
655  } else {
656  auto ptr = bit_cast<const vector_t *>((&m_Data)->data());
657  return *ptr;
658  }
659  }
660 #endif // __SYCL_DEVICE_ONLY__
661 
662  // Available only when: NumElements == 1
663  template <int N = NumElements>
664  operator typename std::enable_if_t<N == 1, DataT>() const {
665  return vec_data<DataT>::get(m_Data);
666  }
667 
668  __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead")
669  static constexpr size_t get_count() { return size(); }
670  static constexpr size_t size() noexcept { return NumElements; }
671  __SYCL2020_DEPRECATED(
672  "get_size() is deprecated, please use byte_size() instead")
673  static constexpr size_t get_size() { return byte_size(); }
674  static constexpr size_t byte_size() noexcept { return sizeof(m_Data); }
675 
676  // convertImpl can't be called with the same From and To types and therefore
677  // we need this version of convert which is mostly no-op.
678  template <typename convertT,
679  rounding_mode roundingMode = rounding_mode::automatic>
680  std::enable_if_t<
681  std::is_same_v<vec_data_t<DataT>, vec_data_t<convertT>> ||
682  std::is_same_v<detail::ConvertToOpenCLType_t<vec_data_t<DataT>>,
683  detail::ConvertToOpenCLType_t<vec_data_t<convertT>>>,
684  vec<convertT, NumElements>>
685  convert() const {
686  static_assert(std::is_integral_v<vec_data_t<convertT>> ||
687  detail::is_floating_point<convertT>::value,
688  "Unsupported convertT");
689  if constexpr (!std::is_same_v<DataT, convertT>) {
690  // Dummy conversion for cases like vec<signed char> -> vec<char>
691  vec<convertT, NumElements> Result;
692  for (size_t I = 0; I < NumElements; ++I)
693  Result.setValue(I, static_cast<convertT>(getValue(I)));
694 
695  return Result;
696  } else {
697  // No conversion necessary
698  return *this;
699  }
700  }
701 
702  template <typename convertT,
703  rounding_mode roundingMode = rounding_mode::automatic>
704  std::enable_if_t<
705  !std::is_same_v<vec_data_t<DataT>, vec_data_t<convertT>> &&
706  !std::is_same_v<detail::ConvertToOpenCLType_t<vec_data_t<DataT>>,
707  detail::ConvertToOpenCLType_t<vec_data_t<convertT>>>,
708  vec<convertT, NumElements>>
709  convert() const {
710  static_assert(std::is_integral_v<vec_data_t<convertT>> ||
711  detail::is_floating_point<convertT>::value,
712  "Unsupported convertT");
713  using T = vec_data_t<DataT>;
714  using R = vec_data_t<convertT>;
715  using OpenCLT = detail::ConvertToOpenCLType_t<T>;
716  using OpenCLR = detail::ConvertToOpenCLType_t<R>;
717  vec<convertT, NumElements> Result;
718 
719 #if defined(__SYCL_DEVICE_ONLY__)
720  using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
721  using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));
722  // Whole vector conversion can only be done, if:
723  constexpr bool canUseNativeVectorConvert =
724 #ifdef __NVPTX__
725  // - we are not on CUDA, see intel/llvm#11840
726  false &&
727 #endif
728  // - both vectors are represented using native vector types;
729  NativeVec && vec<convertT, NumElements>::NativeVec &&
730  // - vec storage has an equivalent OpenCL native vector it is implicitly
731  // convertible to. There are some corner cases where it is not the
732  // case with char, long and long long types.
733  std::is_convertible_v<decltype(m_Data), OpenCLVecT> &&
734  std::is_convertible_v<decltype(Result.m_Data), OpenCLVecR> &&
735  // - it is not a signed to unsigned (or vice versa) conversion
736  // see comments within 'convertImpl' for more details;
737  !detail::is_sint_to_from_uint<T, R>::value &&
738  // - destination type is not bool. bool is stored as integer under the
739  // hood and therefore conversion to bool looks like conversion between
740  // two integer types. Since bit pattern for true and false is not
741  // defined, there is no guarantee that integer conversion yields
742  // right results here;
743  !std::is_same_v<convertT, bool>;
744  if constexpr (canUseNativeVectorConvert) {
745  Result.m_Data = detail::convertImpl<T, R, roundingMode, NumElements,
746  OpenCLVecT, OpenCLVecR>(m_Data);
747  } else
748 #endif // defined(__SYCL_DEVICE_ONLY__)
749  {
750  // Otherwise, we fallback to per-element conversion:
751  for (size_t I = 0; I < NumElements; ++I) {
752  Result.setValue(
753  I, vec_data<convertT>::get(
754  detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
755  vec_data<DataT>::get(getValue(I)))));
756  }
757  }
758 
759  return Result;
760  }
761 
762  template <typename asT> asT as() const {
763  static_assert((sizeof(*this) == sizeof(asT)),
764  "The new SYCL vec type must have the same storage size in "
765  "bytes as this SYCL vec");
766  static_assert(
767  detail::is_contained<asT, detail::gtl::vector_basic_list>::value ||
768  detail::is_contained<asT, detail::gtl::vector_bool_list>::value,
769  "asT must be SYCL vec of a different element type and "
770  "number of elements specified by asT");
771  asT Result;
772  detail::memcpy(&Result.m_Data, &m_Data, sizeof(decltype(Result.m_Data)));
773  return Result;
774  }
775 
776  template <int... SwizzleIndexes> Swizzle<SwizzleIndexes...> swizzle() {
777  return this;
778  }
779 
780  template <int... SwizzleIndexes>
781  ConstSwizzle<SwizzleIndexes...> swizzle() const {
782  return this;
783  }
784 
785  // ext_vector_type is used as an underlying type for sycl::vec on device.
786  // The problem is that for clang vector types the return of operator[] is a
787  // temporary and not a reference to the element in the vector. In practice
788  // reinterpret_cast<DataT *>(&m_Data)[i]; is working. According to
789  // http://llvm.org/docs/GetElementPtr.html#can-gep-index-into-vector-elements
790  // this is not disallowed now. But could probably be disallowed in the future.
791  // That is why tests are added to check that behavior of the compiler has
792  // not changed.
793  //
794  // Implement operator [] in the same way for host and device.
795  // TODO: change host side implementation when underlying type for host side
796  // will be changed to std::array.
797  // NOTE: aliasing the incompatible types of bfloat16 may lead to problems if
798  // aggressively optimized. Specializing with noinline to avoid as workaround.
799 
800  template <typename T = DataT>
801  typename std::enable_if_t<!std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
802  const DataT &>
803  operator[](int i) const {
804  return reinterpret_cast<const DataT *>(&m_Data)[i];
805  }
806 
807  template <typename T = DataT>
808  typename std::enable_if_t<!std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
809  DataT &>
810  operator[](int i) {
811  return reinterpret_cast<DataT *>(&m_Data)[i];
812  }
813 
814 #ifdef _MSC_VER
815 #define __SYCL_NOINLINE_BF16 __declspec(noinline)
816 #else
817 #define __SYCL_NOINLINE_BF16 __attribute__((noinline))
818 #endif
819 
820  template <typename T = DataT>
821  __SYCL_NOINLINE_BF16
822  typename std::enable_if_t<std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
823  const DataT &>
824  operator[](int i) const {
825  return reinterpret_cast<const DataT *>(&m_Data)[i];
826  }
827 
828  template <typename T = DataT>
829  __SYCL_NOINLINE_BF16
830  typename std::enable_if_t<std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
831  DataT &>
832  operator[](int i) {
833  return reinterpret_cast<DataT *>(&m_Data)[i];
834  }
835 
836 #undef __SYCL_NOINLINE_BF16
837 
838  // Begin hi/lo, even/odd, xyzw, and rgba swizzles.
839 private:
840  // Indexer used in the swizzles.def
841  // Currently it is defined as a template struct. Replacing it with a constexpr
842  // function would activate a bug in MSVC that is fixed only in v19.20.
843  // Until then MSVC does not recognize such constexpr functions as const and
844  // thus does not let using them in template parameters inside swizzle.def.
845  template <int Index> struct Indexer {
846  static constexpr int value = Index;
847  };
848 
849 public:
850 #ifdef __SYCL_ACCESS_RETURN
851 #error "Undefine __SYCL_ACCESS_RETURN macro"
852 #endif
853 #define __SYCL_ACCESS_RETURN this
854 #include "swizzles.def"
855 #undef __SYCL_ACCESS_RETURN
856  // End of hi/lo, even/odd, xyzw, and rgba swizzles.
857 
858  template <access::address_space Space, access::decorated DecorateAddress>
859  void load(size_t Offset, multi_ptr<const DataT, Space, DecorateAddress> Ptr) {
860  for (int I = 0; I < NumElements; I++) {
861  setValue(I, *multi_ptr<const DataT, Space, DecorateAddress>(
862  Ptr + Offset * NumElements + I));
863  }
864  }
865  template <access::address_space Space, access::decorated DecorateAddress>
866  void load(size_t Offset, multi_ptr<DataT, Space, DecorateAddress> Ptr) {
867  multi_ptr<const DataT, Space, DecorateAddress> ConstPtr(Ptr);
868  load(Offset, ConstPtr);
869  }
870  template <int Dimensions, access::mode Mode,
871  access::placeholder IsPlaceholder, access::target Target,
872  typename PropertyListT>
873  void
874  load(size_t Offset,
875  accessor<DataT, Dimensions, Mode, Target, IsPlaceholder, PropertyListT>
876  Acc) {
877  multi_ptr<const DataT, detail::TargetToAS<Target>::AS,
878  access::decorated::yes>
879  MultiPtr(Acc);
880  load(Offset, MultiPtr);
881  }
882  template <access::address_space Space, access::decorated DecorateAddress>
883  void store(size_t Offset,
884  multi_ptr<DataT, Space, DecorateAddress> Ptr) const {
885  for (int I = 0; I < NumElements; I++) {
886  *multi_ptr<DataT, Space, DecorateAddress>(Ptr + Offset * NumElements +
887  I) = getValue(I);
888  }
889  }
890  template <int Dimensions, access::mode Mode,
891  access::placeholder IsPlaceholder, access::target Target,
892  typename PropertyListT>
893  void
894  store(size_t Offset,
895  accessor<DataT, Dimensions, Mode, Target, IsPlaceholder, PropertyListT>
896  Acc) {
897  multi_ptr<DataT, detail::TargetToAS<Target>::AS, access::decorated::yes>
898  MultiPtr(Acc);
899  store(Offset, MultiPtr);
900  }
901 
902  void ConvertToDataT() {
903  for (size_t i = 0; i < NumElements; ++i) {
904  DataT tmp = getValue(i);
905  setValue(i, tmp);
906  }
907  }
908 
909 #ifdef __SYCL_BINOP
910 #error "Undefine __SYCL_BINOP macro"
911 #endif
912 
913 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
914 #define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \
915  friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \
916  vec Ret; \
917  if constexpr (IsUsingArrayOnDevice) { \
918  for (size_t I = 0; I < NumElements; ++I) { \
919  Ret.setValue(I, (Lhs.getValue(I) BINOP Rhs.getValue(I))); \
920  } \
921  } else { \
922  Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \
923  if constexpr (std::is_same_v<Type, bool> && CONVERT) { \
924  Ret.ConvertToDataT(); \
925  } \
926  } \
927  return Ret; \
928  } \
929  friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \
930  return Lhs BINOP vec(Rhs); \
931  } \
932  friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \
933  return vec(Lhs) BINOP Rhs; \
934  } \
935  friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \
936  Lhs = Lhs BINOP Rhs; \
937  return Lhs; \
938  } \
939  template <int Num = NumElements> \
940  friend typename std::enable_if_t<Num != 1, vec &> operator OPASSIGN( \
941  vec & Lhs, const DataT & Rhs) { \
942  Lhs = Lhs BINOP vec(Rhs); \
943  return Lhs; \
944  }
945 
946 #else // __SYCL_USE_EXT_VECTOR_TYPE__
947 
948 #define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \
949  friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \
950  vec Ret{}; \
951  if constexpr (NativeVec) \
952  Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \
953  else \
954  for (size_t I = 0; I < NumElements; ++I) \
955  Ret.setValue(I, (DataT)(vec_data<DataT>::get(Lhs.getValue( \
956  I)) BINOP vec_data<DataT>::get(Rhs.getValue(I)))); \
957  return Ret; \
958  } \
959  friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \
960  return Lhs BINOP vec(Rhs); \
961  } \
962  friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \
963  return vec(Lhs) BINOP Rhs; \
964  } \
965  friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \
966  Lhs = Lhs BINOP Rhs; \
967  return Lhs; \
968  } \
969  template <int Num = NumElements> \
970  friend typename std::enable_if_t<Num != 1, vec &> operator OPASSIGN( \
971  vec & Lhs, const DataT & Rhs) { \
972  Lhs = Lhs BINOP vec(Rhs); \
973  return Lhs; \
974  }
975 
976 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
977 
978  __SYCL_BINOP(+, +=, true)
979  __SYCL_BINOP(-, -=, true)
980  __SYCL_BINOP(*, *=, false)
981  __SYCL_BINOP(/, /=, false)
982 
983  // TODO: The following OPs are available only when: DataT != cl_float &&
984  // DataT != cl_double && DataT != cl_half
985  __SYCL_BINOP(%, %=, false)
986  __SYCL_BINOP(|, |=, false)
987  __SYCL_BINOP(&, &=, false)
988  __SYCL_BINOP(^, ^=, false)
989  __SYCL_BINOP(>>, >>=, false)
990  __SYCL_BINOP(<<, <<=, true)
991 #undef __SYCL_BINOP
992 #undef __SYCL_BINOP_HELP
993 
994  // Note: vec<>/SwizzleOp logical value is 0/-1 logic, as opposed to 0/1 logic.
995  // As far as CTS validation is concerned, 0/-1 logic also applies when
996  // NumElements is equal to one, which is somewhat inconsistent with being
997  // transparent with scalar data.
998  // TODO: Determine if vec<, NumElements=1> is needed at all, remove this
999  // inconsistency if not by disallowing one-element vectors (as in OpenCL)
1000 
1001 #ifdef __SYCL_RELLOGOP
1002 #error "Undefine __SYCL_RELLOGOP macro"
1003 #endif
1004 // Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined
1005 // by SYCL device compiler only.
1006 #ifdef __SYCL_DEVICE_ONLY__
1007 #define __SYCL_RELLOGOP(RELLOGOP) \
1008  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1009  const vec & Rhs) { \
1010  vec<rel_t, NumElements> Ret{}; \
1011  /* This special case is needed since there are no standard operator|| */ \
1012  /* or operator&& functions for std::array. */ \
1013  if constexpr (IsUsingArrayOnDevice && \
1014  (std::string_view(#RELLOGOP) == "||" || \
1015  std::string_view(#RELLOGOP) == "&&")) { \
1016  for (size_t I = 0; I < NumElements; ++I) { \
1017  /* We cannot use SetValue here as the operator is not a friend of*/ \
1018  /* Ret on Windows. */ \
1019  Ret[I] = static_cast<rel_t>(-(vec_data<DataT>::get( \
1020  Lhs.getValue(I)) RELLOGOP vec_data<DataT>::get(Rhs.getValue(I)))); \
1021  } \
1022  } else { \
1023  Ret = vec<rel_t, NumElements>( \
1024  (typename vec<rel_t, NumElements>::vector_t)( \
1025  Lhs.m_Data RELLOGOP Rhs.m_Data)); \
1026  if (NumElements == 1) /*Scalar 0/1 logic was applied, invert*/ \
1027  Ret *= -1; \
1028  } \
1029  return Ret; \
1030  } \
1031  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1032  const DataT & Rhs) { \
1033  return Lhs RELLOGOP vec(Rhs); \
1034  } \
1035  friend vec<rel_t, NumElements> operator RELLOGOP(const DataT & Lhs, \
1036  const vec & Rhs) { \
1037  return vec(Lhs) RELLOGOP Rhs; \
1038  }
1039 
1040 #else
1041 #define __SYCL_RELLOGOP(RELLOGOP) \
1042  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1043  const vec & Rhs) { \
1044  vec<rel_t, NumElements> Ret{}; \
1045  for (size_t I = 0; I < NumElements; ++I) { \
1046  /* We cannot use SetValue here as the operator is not a friend of*/ \
1047  /* Ret on Windows. */ \
1048  Ret[I] = static_cast<rel_t>(-(vec_data<DataT>::get( \
1049  Lhs.getValue(I)) RELLOGOP vec_data<DataT>::get(Rhs.getValue(I)))); \
1050  } \
1051  return Ret; \
1052  } \
1053  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1054  const DataT & Rhs) { \
1055  return Lhs RELLOGOP vec(Rhs); \
1056  } \
1057  friend vec<rel_t, NumElements> operator RELLOGOP(const DataT & Lhs, \
1058  const vec & Rhs) { \
1059  return vec(Lhs) RELLOGOP Rhs; \
1060  }
1061 #endif
1062 
1063  __SYCL_RELLOGOP(==)
1064  __SYCL_RELLOGOP(!=)
1065  __SYCL_RELLOGOP(>)
1066  __SYCL_RELLOGOP(<)
1067  __SYCL_RELLOGOP(>=)
1068  __SYCL_RELLOGOP(<=)
1069  // TODO: limit to integral types.
1070  __SYCL_RELLOGOP(&&)
1071  __SYCL_RELLOGOP(||)
1072 #undef __SYCL_RELLOGOP
1073 
1074 #ifdef __SYCL_UOP
1075 #error "Undefine __SYCL_UOP macro"
1076 #endif
1077 #define __SYCL_UOP(UOP, OPASSIGN) \
1078  friend vec &operator UOP(vec & Rhs) { \
1079  Rhs OPASSIGN vec_data<DataT>::get(1); \
1080  return Rhs; \
1081  } \
1082  friend vec operator UOP(vec &Lhs, int) { \
1083  vec Ret(Lhs); \
1084  Lhs OPASSIGN vec_data<DataT>::get(1); \
1085  return Ret; \
1086  }
1087 
1088  __SYCL_UOP(++, +=)
1089  __SYCL_UOP(--, -=)
1090 #undef __SYCL_UOP
1091 
1092  // operator~() available only when: dataT != float && dataT != double
1093  // && dataT != half
1094  friend vec operator~(const vec &Rhs) {
1095  if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1096  vec Ret{};
1097  for (size_t I = 0; I < NumElements; ++I) {
1098  Ret.setValue(I, ~Rhs.getValue(I));
1099  }
1100  return Ret;
1101  } else {
1102  vec Ret{(typename vec::DataType) ~Rhs.m_Data};
1103  if constexpr (std::is_same_v<Type, bool>) {
1104  Ret.ConvertToDataT();
1105  }
1106  return Ret;
1107  }
1108  }
1109 
1110  // operator!
1111  friend vec<detail::rel_t<DataT>, NumElements> operator!(const vec &Rhs) {
1112  if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1113  vec Ret{};
1114  for (size_t I = 0; I < NumElements; ++I) {
1115 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1116  // std::byte neither supports ! unary op or casting, so special handling
1117  // is needed. And, worse, Windows has a conflict with 'byte'.
1118  if constexpr (std::is_same_v<std::byte, DataT>) {
1119  Ret.setValue(I, std::byte{!vec_data<DataT>::get(Rhs.getValue(I))});
1120  } else
1121 #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1122  {
1123  Ret.setValue(I, !vec_data<DataT>::get(Rhs.getValue(I)));
1124  }
1125  }
1126  return Ret.template as<vec<detail::rel_t<DataT>, NumElements>>();
1127  } else {
1128  return vec{(typename vec<DataT, NumElements>::DataType) !Rhs.m_Data}
1129  .template as<vec<detail::rel_t<DataT>, NumElements>>();
1130  }
1131  }
1132 
1133  // operator +
1134  friend vec operator+(const vec &Lhs) {
1135  if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1136  vec Ret{};
1137  for (size_t I = 0; I < NumElements; ++I)
1138  Ret.setValue(
1139  I, vec_data<DataT>::get(+vec_data<DataT>::get(Lhs.getValue(I))));
1140  return Ret;
1141  } else {
1142  return vec{+Lhs.m_Data};
1143  }
1144  }
1145 
1146  // operator -
1147  friend vec operator-(const vec &Lhs) {
1148  namespace oneapi = sycl::ext::oneapi;
1149  vec Ret{};
1150  if constexpr (IsBfloat16 && NumElements == 1) {
1151  oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data);
1152  oneapi::bfloat16 w = -v;
1153  Ret.m_Data = oneapi::detail::bfloat16ToBits(w);
1154  } else if constexpr (IsBfloat16) {
1155  for (size_t I = 0; I < NumElements; I++) {
1156  oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data[I]);
1157  oneapi::bfloat16 w = -v;
1158  Ret.m_Data[I] = oneapi::detail::bfloat16ToBits(w);
1159  }
1160  } else if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1161  for (size_t I = 0; I < NumElements; ++I)
1162  Ret.setValue(
1163  I, vec_data<DataT>::get(-vec_data<DataT>::get(Lhs.getValue(I))));
1164  return Ret;
1165  } else {
1166  Ret = vec{-Lhs.m_Data};
1167  if constexpr (std::is_same_v<Type, bool>) {
1168  Ret.ConvertToDataT();
1169  }
1170  return Ret;
1171  }
1172  }
1173 
1174  // OP is: &&, ||
1175  // vec<RET, NumElements> operatorOP(const vec<DataT, NumElements> &Rhs) const;
1176  // vec<RET, NumElements> operatorOP(const DataT &Rhs) const;
1177 
1178  // OP is: ==, !=, <, >, <=, >=
1179  // vec<RET, NumElements> operatorOP(const vec<DataT, NumElements> &Rhs) const;
1180  // vec<RET, NumElements> operatorOP(const DataT &Rhs) const;
1181 private:
1182  // Generic method that execute "Operation" on underlying values.
1183 
1184 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
1185  template <template <typename> class Operation,
1186  typename Ty = vec<DataT, NumElements>>
1187  vec<DataT, NumElements>
1188  operatorHelper(const EnableIfNotUsingArrayOnDevice<Ty> &Rhs) const {
1189  vec<DataT, NumElements> Result;
1190  Operation<DataType> Op;
1191  Result.m_Data = Op(m_Data, Rhs.m_Data);
1192  return Result;
1193  }
1194 
1195  template <template <typename> class Operation,
1196  typename Ty = vec<DataT, NumElements>>
1197  vec<DataT, NumElements>
1198  operatorHelper(const EnableIfUsingArrayOnDevice<Ty> &Rhs) const {
1199  vec<DataT, NumElements> Result;
1200  Operation<DataT> Op;
1201  for (size_t I = 0; I < NumElements; ++I) {
1202  Result.setValue(I, Op(Rhs.getValue(I), getValue(I)));
1203  }
1204  return Result;
1205  }
1206 #else // __SYCL_USE_EXT_VECTOR_TYPE__
1207  template <template <typename> class Operation>
1208  vec<DataT, NumElements>
1209  operatorHelper(const vec<DataT, NumElements> &Rhs) const {
1210  vec<DataT, NumElements> Result;
1211  Operation<DataT> Op;
1212  for (size_t I = 0; I < NumElements; ++I) {
1213  Result.setValue(I, Op(Rhs.getValue(I), getValue(I)));
1214  }
1215  return Result;
1216  }
1217 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
1218 
1219  // setValue and getValue should be able to operate on different underlying
1220  // types: enum cl_float#N , builtin vector float#N, builtin type float.
1221  // These versions are for N > 1.
1222 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
1223  template <int Num = NumElements, typename Ty = int,
1224  typename = typename std::enable_if_t<1 != Num>>
1225  constexpr void setValue(EnableIfNotHostHalf<Ty> Index, const DataT &Value,
1226  int) {
1227  m_Data[Index] = vec_data<DataT>::set(Value);
1228  }
1229 
1230  template <int Num = NumElements, typename Ty = int,
1231  typename = typename std::enable_if_t<1 != Num>>
1232  constexpr DataT getValue(EnableIfNotHostHalf<Ty> Index, int) const {
1233  return vec_data<DataT>::get(m_Data[Index]);
1234  }
1235 
1236  template <int Num = NumElements, typename Ty = int,
1237  typename = typename std::enable_if_t<1 != Num>>
1238  constexpr void setValue(EnableIfHostHalf<Ty> Index, const DataT &Value, int) {
1239  m_Data.s[Index] = vec_data<DataT>::set(Value);
1240  }
1241 
1242  template <int Num = NumElements, typename Ty = int,
1243  typename = typename std::enable_if_t<1 != Num>>
1244  constexpr DataT getValue(EnableIfHostHalf<Ty> Index, int) const {
1245  return vec_data<DataT>::get(m_Data.s[Index]);
1246  }
1247 #else // __SYCL_USE_EXT_VECTOR_TYPE__
1248  template <int Num = NumElements,
1249  typename = typename std::enable_if_t<1 != Num>>
1250  constexpr void setValue(int Index, const DataT &Value, int) {
1251  m_Data[Index] = vec_data<DataT>::set(Value);
1252  }
1253 
1254  template <int Num = NumElements,
1255  typename = typename std::enable_if_t<1 != Num>>
1256  constexpr DataT getValue(int Index, int) const {
1257  return vec_data<DataT>::get(m_Data[Index]);
1258  }
1259 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
1260 
1261  // N==1 versions, used by host and device. Shouldn't trailing type be int?
1262  template <int Num = NumElements,
1263  typename = typename std::enable_if_t<1 == Num>>
1264  constexpr void setValue(int, const DataT &Value, float) {
1265  m_Data = vec_data<DataT>::set(Value);
1266  }
1267 
1268  template <int Num = NumElements,
1269  typename = typename std::enable_if_t<1 == Num>>
1270  DataT getValue(int, float) const {
1271  return vec_data<DataT>::get(m_Data);
1272  }
1273 
1274  // setValue and getValue.
1275  // The "api" functions used by BINOP etc. These versions just dispatch
1276  // using additional int or float arg to disambiguate vec<1> vs. vec<N>
1277  // Special proxies as specialization is not allowed in class scope.
1278  constexpr void setValue(int Index, const DataT &Value) {
1279  if (NumElements == 1)
1280  setValue(Index, Value, 0);
1281  else
1282  setValue(Index, Value, 0.f);
1283  }
1284 
1285  DataT getValue(int Index) const {
1286  return (NumElements == 1) ? getValue(Index, 0) : getValue(Index, 0.f);
1287  }
1288 
1289  // fields
1290 
1291  // Alignment is the same as size, to a maximum size of 64.
1292  // detail::vector_alignment will return that value.
1293  alignas(detail::vector_alignment<DataT, NumElements>::value) DataType m_Data;
1294 
1295  // friends
1296  template <typename T1, typename T2, typename T3, template <typename> class T4,
1297  int... T5>
1298  friend class detail::SwizzleOp;
1299  template <typename T1, int T2> friend class vec;
1300 };
1301 
1302 #ifdef __cpp_deduction_guides
1303 // all compilers supporting deduction guides also support fold expressions
1304 template <class T, class... U,
1305  class = std::enable_if_t<(std::is_same_v<T, U> && ...)>>
1306 vec(T, U...) -> vec<T, sizeof...(U) + 1>;
1307 #endif
1308 
1309 namespace detail {
1310 
1311 // SwizzleOP represents expression templates that operate on vec.
1312 // Actual computation performed on conversion or assignment operators.
1313 template <typename VecT, typename OperationLeftT, typename OperationRightT,
1314  template <typename> class OperationCurrentT, int... Indexes>
1315 class SwizzleOp {
1316  using DataT = typename VecT::element_type;
1317  // Certain operators return a vector with a different element type. Also, the
1318  // left and right operand types may differ. CommonDataT selects a result type
1319  // based on these types to ensure that the result value can be represented.
1320  //
1321  // Example 1:
1322  // sycl::vec<unsigned char, 4> vec{...};
1323  // auto result = 300u + vec.x();
1324  //
1325  // CommonDataT is std::common_type_t<OperationLeftT, OperationRightT> since
1326  // it's larger than unsigned char.
1327  //
1328  // Example 2:
1329  // sycl::vec<bool, 1> vec{...};
1330  // auto result = vec.template swizzle<sycl::elem::s0>() && vec;
1331  //
1332  // CommonDataT is DataT since operator&& returns a vector with element type
1333  // int8_t, which is larger than bool.
1334  //
1335  // Example 3:
1336  // sycl::vec<std::byte, 4> vec{...}; auto swlo = vec.lo();
1337  // auto result = swlo == swlo;
1338  //
1339  // CommonDataT is DataT since operator== returns a vector with element type
1340  // int8_t, which is the same size as std::byte. std::common_type_t<DataT, ...>
1341  // can't be used here since there's no type that int8_t and std::byte can both
1342  // be implicitly converted to.
1343  using OpLeftDataT = typename OperationLeftT::DataT;
1344  using OpRightDataT = typename OperationRightT::DataT;
1345  using CommonDataT = std::conditional_t<
1346  sizeof(DataT) >= sizeof(std::common_type_t<OpLeftDataT, OpRightDataT>),
1347  DataT, std::common_type_t<OpLeftDataT, OpRightDataT>>;
1348  static constexpr int getNumElements() { return sizeof...(Indexes); }
1349 
1350  using rel_t = detail::rel_t<DataT>;
1351  using vec_t = vec<DataT, sizeof...(Indexes)>;
1352  using vec_rel_t = vec<rel_t, sizeof...(Indexes)>;
1353 
1354  template <typename OperationRightT_,
1355  template <typename> class OperationCurrentT_, int... Idx_>
1356  using NewLHOp = SwizzleOp<VecT,
1357  SwizzleOp<VecT, OperationLeftT, OperationRightT,
1358  OperationCurrentT, Indexes...>,
1359  OperationRightT_, OperationCurrentT_, Idx_...>;
1360 
1361  template <typename OperationRightT_,
1362  template <typename> class OperationCurrentT_, int... Idx_>
1363  using NewRelOp = SwizzleOp<vec<rel_t, VecT::getNumElements()>,
1364  SwizzleOp<VecT, OperationLeftT, OperationRightT,
1365  OperationCurrentT, Indexes...>,
1366  OperationRightT_, OperationCurrentT_, Idx_...>;
1367 
1368  template <typename OperationLeftT_,
1369  template <typename> class OperationCurrentT_, int... Idx_>
1370  using NewRHOp = SwizzleOp<VecT, OperationLeftT_,
1371  SwizzleOp<VecT, OperationLeftT, OperationRightT,
1372  OperationCurrentT, Indexes...>,
1373  OperationCurrentT_, Idx_...>;
1374 
1375  template <int IdxNum, typename T = void>
1376  using EnableIfOneIndex = typename std::enable_if_t<
1377  1 == IdxNum && SwizzleOp::getNumElements() == IdxNum, T>;
1378 
1379  template <int IdxNum, typename T = void>
1380  using EnableIfMultipleIndexes = typename std::enable_if_t<
1381  1 != IdxNum && SwizzleOp::getNumElements() == IdxNum, T>;
1382 
1383  template <typename T>
1384  using EnableIfScalarType = typename std::enable_if_t<
1385  std::is_convertible_v<DataT, T> &&
1386  (std::is_fundamental_v<vec_data_t<T>> ||
1387  detail::is_half_or_bf16_v<typename std::remove_const_t<T>>)>;
1388 
1389  template <typename T>
1390  using EnableIfNoScalarType = typename std::enable_if_t<
1391  !std::is_convertible_v<DataT, T> ||
1392  !(std::is_fundamental_v<vec_data_t<T>> ||
1393  detail::is_half_or_bf16_v<typename std::remove_const_t<T>>)>;
1394 
1395  template <int... Indices>
1396  using Swizzle =
1397  SwizzleOp<VecT, GetOp<DataT>, GetOp<DataT>, GetOp, Indices...>;
1398 
1399  template <int... Indices>
1400  using ConstSwizzle =
1401  SwizzleOp<const VecT, GetOp<DataT>, GetOp<DataT>, GetOp, Indices...>;
1402 
1403 public:
1404  using element_type = DataT;
1405  using value_type = DataT;
1406 
1407 #ifdef __SYCL_DEVICE_ONLY__
1408  using vector_t = typename vec_t::vector_t;
1409 #endif // __SYCL_DEVICE_ONLY__
1410 
1411  const DataT &operator[](int i) const {
1412  std::array<int, getNumElements()> Idxs{Indexes...};
1413  return (*m_Vector)[Idxs[i]];
1414  }
1415 
1416  template <typename _T = VecT>
1417  std::enable_if_t<!std::is_const_v<_T>, DataT> &operator[](int i) {
1418  std::array<int, getNumElements()> Idxs{Indexes...};
1419  return (*m_Vector)[Idxs[i]];
1420  }
1421 
1422  __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead")
1423  size_t get_count() const { return size(); }
1424  static constexpr size_t size() noexcept { return getNumElements(); }
1425 
1426  template <int Num = getNumElements()>
1427  __SYCL2020_DEPRECATED(
1428  "get_size() is deprecated, please use byte_size() instead")
1429  size_t get_size() const {
1430  return byte_size<Num>();
1431  }
1432 
1433  template <int Num = getNumElements()> size_t byte_size() const noexcept {
1434  return sizeof(DataT) * (Num == 3 ? 4 : Num);
1435  }
1436 
1437  template <typename T, int IdxNum = getNumElements(),
1438  typename = EnableIfOneIndex<IdxNum>,
1439  typename = EnableIfScalarType<T>>
1440  operator T() const {
1441  return getValue(0);
1442  }
1443 
1444  template <typename T, typename = EnableIfScalarType<T>>
1445  friend NewRHOp<GetScalarOp<T>, std::multiplies, Indexes...>
1446  operator*(const T &Lhs, const SwizzleOp &Rhs) {
1447  return NewRHOp<GetScalarOp<T>, std::multiplies, Indexes...>(
1448  Rhs.m_Vector, GetScalarOp<T>(Lhs), Rhs);
1449  }
1450 
1451  template <typename T, typename = EnableIfScalarType<T>>
1452  friend NewRHOp<GetScalarOp<T>, std::plus, Indexes...>
1453  operator+(const T &Lhs, const SwizzleOp &Rhs) {
1454  return NewRHOp<GetScalarOp<T>, std::plus, Indexes...>(
1455  Rhs.m_Vector, GetScalarOp<T>(Lhs), Rhs);
1456  }
1457 
1458  template <typename T, typename = EnableIfScalarType<T>>
1459  friend NewRHOp<GetScalarOp<T>, std::divides, Indexes...>
1460  operator/(const T &Lhs, const SwizzleOp &Rhs) {
1461  return NewRHOp<GetScalarOp<T>, std::divides, Indexes...>(
1462  Rhs.m_Vector, GetScalarOp<T>(Lhs), Rhs);
1463  }
1464 
1465  // TODO: Check that Rhs arg is suitable.
1466 #ifdef __SYCL_OPASSIGN
1467 #error "Undefine __SYCL_OPASSIGN macro."
1468 #endif
1469 #define __SYCL_OPASSIGN(OPASSIGN, OP) \
1470  SwizzleOp &operator OPASSIGN(const DataT & Rhs) { \
1471  operatorHelper<OP>(vec_t(Rhs)); \
1472  return *this; \
1473  } \
1474  template <typename RhsOperation> \
1475  SwizzleOp &operator OPASSIGN(const RhsOperation & Rhs) { \
1476  operatorHelper<OP>(Rhs); \
1477  return *this; \
1478  }
1479 
1480  __SYCL_OPASSIGN(+=, std::plus)
1481  __SYCL_OPASSIGN(-=, std::minus)
1482  __SYCL_OPASSIGN(*=, std::multiplies)
1483  __SYCL_OPASSIGN(/=, std::divides)
1484  __SYCL_OPASSIGN(%=, std::modulus)
1485  __SYCL_OPASSIGN(&=, std::bit_and)
1486  __SYCL_OPASSIGN(|=, std::bit_or)
1487  __SYCL_OPASSIGN(^=, std::bit_xor)
1488  __SYCL_OPASSIGN(>>=, RShift)
1489  __SYCL_OPASSIGN(<<=, LShift)
1490 #undef __SYCL_OPASSIGN
1491 
1492 #ifdef __SYCL_UOP
1493 #error "Undefine __SYCL_UOP macro"
1494 #endif
1495 #define __SYCL_UOP(UOP, OPASSIGN) \
1496  SwizzleOp &operator UOP() { \
1497  *this OPASSIGN static_cast<DataT>(1); \
1498  return *this; \
1499  } \
1500  vec_t operator UOP(int) { \
1501  vec_t Ret = *this; \
1502  *this OPASSIGN static_cast<DataT>(1); \
1503  return Ret; \
1504  }
1505 
1506  __SYCL_UOP(++, +=)
1507  __SYCL_UOP(--, -=)
1508 #undef __SYCL_UOP
1509 
1510  template <typename T = DataT>
1511  friend typename std::enable_if_t<
1512  std::is_same_v<T, DataT> && std::is_integral_v<vec_data_t<T>>, vec_t>
1513  operator~(const SwizzleOp &Rhs) {
1514  vec_t Tmp = Rhs;
1515  return ~Tmp;
1516  }
1517 
1518  friend vec_rel_t operator!(const SwizzleOp &Rhs) {
1519  vec_t Tmp = Rhs;
1520  return !Tmp;
1521  }
1522 
1523  friend vec_t operator+(const SwizzleOp &Rhs) {
1524  vec_t Tmp = Rhs;
1525  return +Tmp;
1526  }
1527 
1528  friend vec_t operator-(const SwizzleOp &Rhs) {
1529  vec_t Tmp = Rhs;
1530  return -Tmp;
1531  }
1532 
1533 // scalar BINOP vec<>
1534 // scalar BINOP SwizzleOp
1535 // vec<> BINOP SwizzleOp
1536 #ifdef __SYCL_BINOP
1537 #error "Undefine __SYCL_BINOP macro"
1538 #endif
1539 #define __SYCL_BINOP(BINOP) \
1540  friend vec_t operator BINOP(const DataT &Lhs, const SwizzleOp &Rhs) { \
1541  vec_t Tmp = Rhs; \
1542  return Lhs BINOP Tmp; \
1543  } \
1544  friend vec_t operator BINOP(const SwizzleOp &Lhs, const DataT &Rhs) { \
1545  vec_t Tmp = Lhs; \
1546  return Tmp BINOP Rhs; \
1547  } \
1548  friend vec_t operator BINOP(const vec_t &Lhs, const SwizzleOp &Rhs) { \
1549  vec_t Tmp = Rhs; \
1550  return Lhs BINOP Tmp; \
1551  } \
1552  friend vec_t operator BINOP(const SwizzleOp &Lhs, const vec_t &Rhs) { \
1553  vec_t Tmp = Lhs; \
1554  return Tmp BINOP Rhs; \
1555  }
1556 
1557  __SYCL_BINOP(+)
1558  __SYCL_BINOP(-)
1559  __SYCL_BINOP(*)
1560  __SYCL_BINOP(/)
1561  __SYCL_BINOP(%)
1562  __SYCL_BINOP(&)
1563  __SYCL_BINOP(|)
1564  __SYCL_BINOP(^)
1565  __SYCL_BINOP(>>)
1566  __SYCL_BINOP(<<)
1567 #undef __SYCL_BINOP
1568 
1569 // scalar RELLOGOP vec<>
1570 // scalar RELLOGOP SwizzleOp
1571 // vec<> RELLOGOP SwizzleOp
1572 #ifdef __SYCL_RELLOGOP
1573 #error "Undefine __SYCL_RELLOGOP macro"
1574 #endif
1575 #define __SYCL_RELLOGOP(RELLOGOP) \
1576  friend vec_rel_t operator RELLOGOP(const DataT &Lhs, const SwizzleOp &Rhs) { \
1577  vec_t Tmp = Rhs; \
1578  return Lhs RELLOGOP Tmp; \
1579  } \
1580  friend vec_rel_t operator RELLOGOP(const SwizzleOp &Lhs, const DataT &Rhs) { \
1581  vec_t Tmp = Lhs; \
1582  return Tmp RELLOGOP Rhs; \
1583  } \
1584  friend vec_rel_t operator RELLOGOP(const vec_t &Lhs, const SwizzleOp &Rhs) { \
1585  vec_t Tmp = Rhs; \
1586  return Lhs RELLOGOP Tmp; \
1587  } \
1588  friend vec_rel_t operator RELLOGOP(const SwizzleOp &Lhs, const vec_t &Rhs) { \
1589  vec_t Tmp = Lhs; \
1590  return Tmp RELLOGOP Rhs; \
1591  }
1592 
1593  __SYCL_RELLOGOP(==)
1594  __SYCL_RELLOGOP(!=)
1595  __SYCL_RELLOGOP(>)
1596  __SYCL_RELLOGOP(<)
1597  __SYCL_RELLOGOP(>=)
1598  __SYCL_RELLOGOP(<=)
1599  // TODO: limit to integral types.
1600  __SYCL_RELLOGOP(&&)
1601  __SYCL_RELLOGOP(||)
1602 #undef __SYCL_RELLOGOP
1603 
1604  template <int IdxNum = getNumElements(),
1605  typename = EnableIfMultipleIndexes<IdxNum>>
1606  SwizzleOp &operator=(const vec<DataT, IdxNum> &Rhs) {
1607  std::array<int, IdxNum> Idxs{Indexes...};
1608  for (size_t I = 0; I < Idxs.size(); ++I) {
1609  m_Vector->setValue(Idxs[I], Rhs.getValue(I));
1610  }
1611  return *this;
1612  }
1613 
1614  template <int IdxNum = getNumElements(), typename = EnableIfOneIndex<IdxNum>>
1615  SwizzleOp &operator=(const DataT &Rhs) {
1616  std::array<int, IdxNum> Idxs{Indexes...};
1617  m_Vector->setValue(Idxs[0], Rhs);
1618  return *this;
1619  }
1620 
1621  template <int IdxNum = getNumElements(),
1622  EnableIfMultipleIndexes<IdxNum, bool> = true>
1623  SwizzleOp &operator=(const DataT &Rhs) {
1624  std::array<int, IdxNum> Idxs{Indexes...};
1625  for (auto Idx : Idxs) {
1626  m_Vector->setValue(Idx, Rhs);
1627  }
1628  return *this;
1629  }
1630 
1631  template <int IdxNum = getNumElements(), typename = EnableIfOneIndex<IdxNum>>
1632  SwizzleOp &operator=(DataT &&Rhs) {
1633  std::array<int, IdxNum> Idxs{Indexes...};
1634  m_Vector->setValue(Idxs[0], Rhs);
1635  return *this;
1636  }
1637 
1638  template <typename T, typename = EnableIfScalarType<T>>
1639  NewLHOp<GetScalarOp<T>, std::multiplies, Indexes...>
1640  operator*(const T &Rhs) const {
1641  return NewLHOp<GetScalarOp<T>, std::multiplies, Indexes...>(
1642  m_Vector, *this, GetScalarOp<T>(Rhs));
1643  }
1644 
1645  template <typename RhsOperation,
1646  typename = EnableIfNoScalarType<RhsOperation>>
1647  NewLHOp<RhsOperation, std::multiplies, Indexes...>
1648  operator*(const RhsOperation &Rhs) const {
1649  return NewLHOp<RhsOperation, std::multiplies, Indexes...>(m_Vector, *this,
1650  Rhs);
1651  }
1652 
1653  template <typename T, typename = EnableIfScalarType<T>>
1654  NewLHOp<GetScalarOp<T>, std::plus, Indexes...> operator+(const T &Rhs) const {
1655  return NewLHOp<GetScalarOp<T>, std::plus, Indexes...>(m_Vector, *this,
1656  GetScalarOp<T>(Rhs));
1657  }
1658 
1659  template <typename RhsOperation,
1660  typename = EnableIfNoScalarType<RhsOperation>>
1661  NewLHOp<RhsOperation, std::plus, Indexes...>
1662  operator+(const RhsOperation &Rhs) const {
1663  return NewLHOp<RhsOperation, std::plus, Indexes...>(m_Vector, *this, Rhs);
1664  }
1665 
1666  template <typename T, typename = EnableIfScalarType<T>>
1667  NewLHOp<GetScalarOp<T>, std::minus, Indexes...>
1668  operator-(const T &Rhs) const {
1669  return NewLHOp<GetScalarOp<T>, std::minus, Indexes...>(m_Vector, *this,
1670  GetScalarOp<T>(Rhs));
1671  }
1672 
1673  template <typename RhsOperation,
1674  typename = EnableIfNoScalarType<RhsOperation>>
1675  NewLHOp<RhsOperation, std::minus, Indexes...>
1676  operator-(const RhsOperation &Rhs) const {
1677  return NewLHOp<RhsOperation, std::minus, Indexes...>(m_Vector, *this, Rhs);
1678  }
1679 
1680  template <typename T, typename = EnableIfScalarType<T>>
1681  NewLHOp<GetScalarOp<T>, std::divides, Indexes...>
1682  operator/(const T &Rhs) const {
1683  return NewLHOp<GetScalarOp<T>, std::divides, Indexes...>(
1684  m_Vector, *this, GetScalarOp<T>(Rhs));
1685  }
1686 
1687  template <typename RhsOperation,
1688  typename = EnableIfNoScalarType<RhsOperation>>
1689  NewLHOp<RhsOperation, std::divides, Indexes...>
1690  operator/(const RhsOperation &Rhs) const {
1691  return NewLHOp<RhsOperation, std::divides, Indexes...>(m_Vector, *this,
1692  Rhs);
1693  }
1694 
1695  template <typename T, typename = EnableIfScalarType<T>>
1696  NewLHOp<GetScalarOp<T>, std::modulus, Indexes...>
1697  operator%(const T &Rhs) const {
1698  return NewLHOp<GetScalarOp<T>, std::modulus, Indexes...>(
1699  m_Vector, *this, GetScalarOp<T>(Rhs));
1700  }
1701 
1702  template <typename RhsOperation,
1703  typename = EnableIfNoScalarType<RhsOperation>>
1704  NewLHOp<RhsOperation, std::modulus, Indexes...>
1705  operator%(const RhsOperation &Rhs) const {
1706  return NewLHOp<RhsOperation, std::modulus, Indexes...>(m_Vector, *this,
1707  Rhs);
1708  }
1709 
1710  template <typename T, typename = EnableIfScalarType<T>>
1711  NewLHOp<GetScalarOp<T>, std::bit_and, Indexes...>
1712  operator&(const T &Rhs) const {
1713  return NewLHOp<GetScalarOp<T>, std::bit_and, Indexes...>(
1714  m_Vector, *this, GetScalarOp<T>(Rhs));
1715  }
1716 
1717  template <typename RhsOperation,
1718  typename = EnableIfNoScalarType<RhsOperation>>
1719  NewLHOp<RhsOperation, std::bit_and, Indexes...>
1720  operator&(const RhsOperation &Rhs) const {
1721  return NewLHOp<RhsOperation, std::bit_and, Indexes...>(m_Vector, *this,
1722  Rhs);
1723  }
1724 
1725  template <typename T, typename = EnableIfScalarType<T>>
1726  NewLHOp<GetScalarOp<T>, std::bit_or, Indexes...>
1727  operator|(const T &Rhs) const {
1728  return NewLHOp<GetScalarOp<T>, std::bit_or, Indexes...>(
1729  m_Vector, *this, GetScalarOp<T>(Rhs));
1730  }
1731 
1732  template <typename RhsOperation,
1733  typename = EnableIfNoScalarType<RhsOperation>>
1734  NewLHOp<RhsOperation, std::bit_or, Indexes...>
1735  operator|(const RhsOperation &Rhs) const {
1736  return NewLHOp<RhsOperation, std::bit_or, Indexes...>(m_Vector, *this, Rhs);
1737  }
1738 
1739  template <typename T, typename = EnableIfScalarType<T>>
1740  NewLHOp<GetScalarOp<T>, std::bit_xor, Indexes...>
1741  operator^(const T &Rhs) const {
1742  return NewLHOp<GetScalarOp<T>, std::bit_xor, Indexes...>(
1743  m_Vector, *this, GetScalarOp<T>(Rhs));
1744  }
1745 
1746  template <typename RhsOperation,
1747  typename = EnableIfNoScalarType<RhsOperation>>
1748  NewLHOp<RhsOperation, std::bit_xor, Indexes...>
1749  operator^(const RhsOperation &Rhs) const {
1750  return NewLHOp<RhsOperation, std::bit_xor, Indexes...>(m_Vector, *this,
1751  Rhs);
1752  }
1753 
1754  template <typename T, typename = EnableIfScalarType<T>>
1755  NewLHOp<GetScalarOp<T>, RShift, Indexes...> operator>>(const T &Rhs) const {
1756  return NewLHOp<GetScalarOp<T>, RShift, Indexes...>(m_Vector, *this,
1757  GetScalarOp<T>(Rhs));
1758  }
1759 
1760  template <typename RhsOperation,
1761  typename = EnableIfNoScalarType<RhsOperation>>
1762  NewLHOp<RhsOperation, RShift, Indexes...>
1763  operator>>(const RhsOperation &Rhs) const {
1764  return NewLHOp<RhsOperation, RShift, Indexes...>(m_Vector, *this, Rhs);
1765  }
1766 
1767  template <typename T, typename = EnableIfScalarType<T>>
1768  NewLHOp<GetScalarOp<T>, LShift, Indexes...> operator<<(const T &Rhs) const {
1769  return NewLHOp<GetScalarOp<T>, LShift, Indexes...>(m_Vector, *this,
1770  GetScalarOp<T>(Rhs));
1771  }
1772 
1773  template <typename RhsOperation,
1774  typename = EnableIfNoScalarType<RhsOperation>>
1775  NewLHOp<RhsOperation, LShift, Indexes...>
1776  operator<<(const RhsOperation &Rhs) const {
1777  return NewLHOp<RhsOperation, LShift, Indexes...>(m_Vector, *this, Rhs);
1778  }
1779 
1780  template <
1781  typename T1, typename T2, typename T3, template <typename> class T4,
1782  int... T5,
1783  typename = typename std::enable_if_t<sizeof...(T5) == getNumElements()>>
1784  SwizzleOp &operator=(const SwizzleOp<T1, T2, T3, T4, T5...> &Rhs) {
1785  std::array<int, getNumElements()> Idxs{Indexes...};
1786  for (size_t I = 0; I < Idxs.size(); ++I) {
1787  m_Vector->setValue(Idxs[I], Rhs.getValue(I));
1788  }
1789  return *this;
1790  }
1791 
1792  template <
1793  typename T1, typename T2, typename T3, template <typename> class T4,
1794  int... T5,
1795  typename = typename std::enable_if_t<sizeof...(T5) == getNumElements()>>
1796  SwizzleOp &operator=(SwizzleOp<T1, T2, T3, T4, T5...> &&Rhs) {
1797  std::array<int, getNumElements()> Idxs{Indexes...};
1798  for (size_t I = 0; I < Idxs.size(); ++I) {
1799  m_Vector->setValue(Idxs[I], Rhs.getValue(I));
1800  }
1801  return *this;
1802  }
1803 
1804  template <typename T, typename = EnableIfScalarType<T>>
1805  NewRelOp<GetScalarOp<T>, EqualTo, Indexes...> operator==(const T &Rhs) const {
1806  return NewRelOp<GetScalarOp<T>, EqualTo, Indexes...>(NULL, *this,
1807  GetScalarOp<T>(Rhs));
1808  }
1809 
1810  template <typename RhsOperation,
1811  typename = EnableIfNoScalarType<RhsOperation>>
1812  NewRelOp<RhsOperation, EqualTo, Indexes...>
1813  operator==(const RhsOperation &Rhs) const {
1814  return NewRelOp<RhsOperation, EqualTo, Indexes...>(NULL, *this, Rhs);
1815  }
1816 
1817  template <typename T, typename = EnableIfScalarType<T>>
1818  NewRelOp<GetScalarOp<T>, NotEqualTo, Indexes...>
1819  operator!=(const T &Rhs) const {
1820  return NewRelOp<GetScalarOp<T>, NotEqualTo, Indexes...>(
1821  NULL, *this, GetScalarOp<T>(Rhs));
1822  }
1823 
1824  template <typename RhsOperation,
1825  typename = EnableIfNoScalarType<RhsOperation>>
1826  NewRelOp<RhsOperation, NotEqualTo, Indexes...>
1827  operator!=(const RhsOperation &Rhs) const {
1828  return NewRelOp<RhsOperation, NotEqualTo, Indexes...>(NULL, *this, Rhs);
1829  }
1830 
1831  template <typename T, typename = EnableIfScalarType<T>>
1832  NewRelOp<GetScalarOp<T>, GreaterEqualTo, Indexes...>
1833  operator>=(const T &Rhs) const {
1834  return NewRelOp<GetScalarOp<T>, GreaterEqualTo, Indexes...>(
1835  NULL, *this, GetScalarOp<T>(Rhs));
1836  }
1837 
1838  template <typename RhsOperation,
1839  typename = EnableIfNoScalarType<RhsOperation>>
1840  NewRelOp<RhsOperation, GreaterEqualTo, Indexes...>
1841  operator>=(const RhsOperation &Rhs) const {
1842  return NewRelOp<RhsOperation, GreaterEqualTo, Indexes...>(NULL, *this, Rhs);
1843  }
1844 
1845  template <typename T, typename = EnableIfScalarType<T>>
1846  NewRelOp<GetScalarOp<T>, LessEqualTo, Indexes...>
1847  operator<=(const T &Rhs) const {
1848  return NewRelOp<GetScalarOp<T>, LessEqualTo, Indexes...>(
1849  NULL, *this, GetScalarOp<T>(Rhs));
1850  }
1851 
1852  template <typename RhsOperation,
1853  typename = EnableIfNoScalarType<RhsOperation>>
1854  NewRelOp<RhsOperation, LessEqualTo, Indexes...>
1855  operator<=(const RhsOperation &Rhs) const {
1856  return NewRelOp<RhsOperation, LessEqualTo, Indexes...>(NULL, *this, Rhs);
1857  }
1858 
1859  template <typename T, typename = EnableIfScalarType<T>>
1860  NewRelOp<GetScalarOp<T>, GreaterThan, Indexes...>
1861  operator>(const T &Rhs) const {
1862  return NewRelOp<GetScalarOp<T>, GreaterThan, Indexes...>(
1863  NULL, *this, GetScalarOp<T>(Rhs));
1864  }
1865 
1866  template <typename RhsOperation,
1867  typename = EnableIfNoScalarType<RhsOperation>>
1868  NewRelOp<RhsOperation, GreaterThan, Indexes...>
1869  operator>(const RhsOperation &Rhs) const {
1870  return NewRelOp<RhsOperation, GreaterThan, Indexes...>(NULL, *this, Rhs);
1871  }
1872 
1873  template <typename T, typename = EnableIfScalarType<T>>
1874  NewRelOp<GetScalarOp<T>, LessThan, Indexes...> operator<(const T &Rhs) const {
1875  return NewRelOp<GetScalarOp<T>, LessThan, Indexes...>(NULL, *this,
1876  GetScalarOp<T>(Rhs));
1877  }
1878 
1879  template <typename RhsOperation,
1880  typename = EnableIfNoScalarType<RhsOperation>>
1881  NewRelOp<RhsOperation, LessThan, Indexes...>
1882  operator<(const RhsOperation &Rhs) const {
1883  return NewRelOp<RhsOperation, LessThan, Indexes...>(NULL, *this, Rhs);
1884  }
1885 
1886  template <typename T, typename = EnableIfScalarType<T>>
1887  NewRelOp<GetScalarOp<T>, LogicalAnd, Indexes...>
1888  operator&&(const T &Rhs) const {
1889  return NewRelOp<GetScalarOp<T>, LogicalAnd, Indexes...>(
1890  NULL, *this, GetScalarOp<T>(Rhs));
1891  }
1892 
1893  template <typename RhsOperation,
1894  typename = EnableIfNoScalarType<RhsOperation>>
1895  NewRelOp<RhsOperation, LogicalAnd, Indexes...>
1896  operator&&(const RhsOperation &Rhs) const {
1897  return NewRelOp<RhsOperation, LogicalAnd, Indexes...>(NULL, *this, Rhs);
1898  }
1899 
1900  template <typename T, typename = EnableIfScalarType<T>>
1901  NewRelOp<GetScalarOp<T>, LogicalOr, Indexes...>
1902  operator||(const T &Rhs) const {
1903  return NewRelOp<GetScalarOp<T>, LogicalOr, Indexes...>(NULL, *this,
1904  GetScalarOp<T>(Rhs));
1905  }
1906 
1907  template <typename RhsOperation,
1908  typename = EnableIfNoScalarType<RhsOperation>>
1909  NewRelOp<RhsOperation, LogicalOr, Indexes...>
1910  operator||(const RhsOperation &Rhs) const {
1911  return NewRelOp<RhsOperation, LogicalOr, Indexes...>(NULL, *this, Rhs);
1912  }
1913 
1914  // Begin hi/lo, even/odd, xyzw, and rgba swizzles.
1915 private:
1916  // Indexer used in the swizzles.def.
1917  // Currently it is defined as a template struct. Replacing it with a constexpr
1918  // function would activate a bug in MSVC that is fixed only in v19.20.
1919  // Until then MSVC does not recognize such constexpr functions as const and
1920  // thus does not let using them in template parameters inside swizzle.def.
1921  template <int Index> struct Indexer {
1922  static constexpr int IDXs[sizeof...(Indexes)] = {Indexes...};
1923  static constexpr int value = IDXs[Index >= getNumElements() ? 0 : Index];
1924  };
1925 
1926 public:
1927 #ifdef __SYCL_ACCESS_RETURN
1928 #error "Undefine __SYCL_ACCESS_RETURN macro"
1929 #endif
1930 #define __SYCL_ACCESS_RETURN m_Vector
1931 #include "swizzles.def"
1932 #undef __SYCL_ACCESS_RETURN
1933  // End of hi/lo, even/odd, xyzw, and rgba swizzles.
1934 
1935  // Leave store() interface to automatic conversion to vec<>.
1936  // Load to vec_t and then assign to swizzle.
1937  template <access::address_space Space, access::decorated DecorateAddress>
1938  void load(size_t offset, multi_ptr<DataT, Space, DecorateAddress> ptr) {
1939  vec_t Tmp;
1940  Tmp.template load(offset, ptr);
1941  *this = Tmp;
1942  }
1943 
1944  template <typename convertT, rounding_mode roundingMode>
1945  vec<convertT, sizeof...(Indexes)> convert() const {
1946  // First materialize the swizzle to vec_t and then apply convert() to it.
1947  vec_t Tmp = *this;
1948  return Tmp.template convert<convertT, roundingMode>();
1949  }
1950 
1951  template <typename asT> asT as() const {
1952  // First materialize the swizzle to vec_t and then apply as() to it.
1953  vec_t Tmp = *this;
1954  static_assert((sizeof(Tmp) == sizeof(asT)),
1955  "The new SYCL vec type must have the same storage size in "
1956  "bytes as this SYCL swizzled vec");
1957  static_assert(
1958  detail::is_contained<asT, detail::gtl::vector_basic_list>::value ||
1959  detail::is_contained<asT, detail::gtl::vector_bool_list>::value,
1960  "asT must be SYCL vec of a different element type and "
1961  "number of elements specified by asT");
1962  return Tmp.template as<asT>();
1963  }
1964 
1965 private:
1966  SwizzleOp(const SwizzleOp &Rhs)
1967  : m_Vector(Rhs.m_Vector), m_LeftOperation(Rhs.m_LeftOperation),
1968  m_RightOperation(Rhs.m_RightOperation) {}
1969 
1970  SwizzleOp(VecT *Vector, OperationLeftT LeftOperation,
1971  OperationRightT RightOperation)
1972  : m_Vector(Vector), m_LeftOperation(LeftOperation),
1973  m_RightOperation(RightOperation) {}
1974 
1975  SwizzleOp(VecT *Vector) : m_Vector(Vector) {}
1976 
1977  SwizzleOp(SwizzleOp &&Rhs)
1978  : m_Vector(Rhs.m_Vector), m_LeftOperation(std::move(Rhs.m_LeftOperation)),
1979  m_RightOperation(std::move(Rhs.m_RightOperation)) {}
1980 
1981  // Either performing CurrentOperation on results of left and right operands
1982  // or reading values from actual vector. Perform implicit type conversion when
1983  // the number of elements == 1
1984 
1985  template <int IdxNum = getNumElements()>
1986  CommonDataT getValue(EnableIfOneIndex<IdxNum, size_t> Index) const {
1987  if (std::is_same_v<OperationCurrentT<DataT>, GetOp<DataT>>) {
1988  std::array<int, getNumElements()> Idxs{Indexes...};
1989  return m_Vector->getValue(Idxs[Index]);
1990  }
1991  auto Op = OperationCurrentT<vec_data_t<CommonDataT>>();
1992  return vec_data<CommonDataT>::get(
1993  Op(vec_data<CommonDataT>::get(m_LeftOperation.getValue(Index)),
1994  vec_data<CommonDataT>::get(m_RightOperation.getValue(Index))));
1995  }
1996 
1997  template <int IdxNum = getNumElements()>
1998  DataT getValue(EnableIfMultipleIndexes<IdxNum, size_t> Index) const {
1999  if (std::is_same_v<OperationCurrentT<DataT>, GetOp<DataT>>) {
2000  std::array<int, getNumElements()> Idxs{Indexes...};
2001  return m_Vector->getValue(Idxs[Index]);
2002  }
2003  auto Op = OperationCurrentT<vec_data_t<DataT>>();
2004  return vec_data<DataT>::get(
2005  Op(vec_data<DataT>::get(m_LeftOperation.getValue(Index)),
2006  vec_data<DataT>::get(m_RightOperation.getValue(Index))));
2007  }
2008 
2009  template <template <typename> class Operation, typename RhsOperation>
2010  void operatorHelper(const RhsOperation &Rhs) {
2011  Operation<vec_data_t<DataT>> Op;
2012  std::array<int, getNumElements()> Idxs{Indexes...};
2013  for (size_t I = 0; I < Idxs.size(); ++I) {
2014  DataT Res = vec_data<DataT>::get(
2015  Op(vec_data<DataT>::get(m_Vector->getValue(Idxs[I])),
2016  vec_data<DataT>::get(Rhs.getValue(I))));
2017  m_Vector->setValue(Idxs[I], Res);
2018  }
2019  }
2020 
2021  // fields
2022  VecT *m_Vector;
2023 
2024  OperationLeftT m_LeftOperation;
2025  OperationRightT m_RightOperation;
2026 
2027  // friends
2028  template <typename T1, int T2> friend class sycl::vec;
2029 
2030  template <typename T1, typename T2, typename T3, template <typename> class T4,
2031  int... T5>
2032  friend class SwizzleOp;
2033 };
2034 } // namespace detail
2035 
2036 namespace detail {
2037 
2038 // Vectors of size 1 are handled separately and therefore 1 is not included in
2039 // the check below.
2040 constexpr bool isValidVectorSize(int N) {
2041  return N == 2 || N == 3 || N == 4 || N == 8 || N == 16;
2042 }
2043 template <typename T, int N, typename V> struct VecStorage {
2044  static_assert(
2045  isValidVectorSize(N) || N == 1,
2046  "Incorrect number of elements for sycl::vec: only 1, 2, 3, 4, 8 "
2047  "or 16 are supported");
2048  static_assert(!std::is_same_v<V, void>, "Incorrect data type for sycl::vec");
2049 };
2050 
2051 #ifdef __SYCL_DEVICE_ONLY__
2052 // device always has ext vector support, but for huge vectors
2053 // we switch to std::array, so that we can use a smaller alignment (64)
2054 // this is to support MSVC, which has a max of 64 for direct params.
2055 template <typename T, int N> struct VecStorageImpl {
2056  static constexpr size_t Num = (N == 3) ? 4 : N;
2057  static constexpr size_t Sz = Num * sizeof(T);
2058  using DataType =
2059  typename std::conditional<Sz <= 64, T __attribute__((ext_vector_type(N))),
2060  std::array<T, Num>>::type;
2061  using VectorDataType = T __attribute__((ext_vector_type(N)));
2062 };
2063 #else // __SYCL_DEVICE_ONLY__
2064 template <typename T, int N> struct VecStorageImpl {
2065  using DataType = std::array<T, (N == 3) ? 4 : N>;
2066 };
2067 #endif // __SYCL_DEVICE_ONLY__
2068 
2069 // Single element bool
2070 template <> struct VecStorage<bool, 1, void> {
2071  using DataType = bool;
2072 #ifdef __SYCL_DEVICE_ONLY__
2073  using VectorDataType = bool;
2074 #endif // __SYCL_DEVICE_ONLY__
2075 };
2076 
2077 // Multiple element bool
2078 template <int N>
2079 struct VecStorage<bool, N, typename std::enable_if_t<isValidVectorSize(N)>> {
2080  using DataType =
2081  typename VecStorageImpl<select_apply_cl_t<bool, std::int8_t, std::int16_t,
2082  std::int32_t, std::int64_t>,
2083  N>::DataType;
2084 #ifdef __SYCL_DEVICE_ONLY__
2085  using VectorDataType =
2086  typename VecStorageImpl<select_apply_cl_t<bool, std::int8_t, std::int16_t,
2087  std::int32_t, std::int64_t>,
2088  N>::VectorDataType;
2089 #endif // __SYCL_DEVICE_ONLY__
2090 };
2091 
2092 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
2093 // Single element byte. Multiple elements will propagate through a later
2094 // specialization.
2095 template <> struct VecStorage<std::byte, 1, void> {
2096  using DataType = std::int8_t;
2097 #ifdef __SYCL_DEVICE_ONLY__
2098  using VectorDataType = std::int8_t;
2099 #endif // __SYCL_DEVICE_ONLY__
2100 };
2101 #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
2102 
2103 // Single element signed integers
2104 template <typename T>
2105 struct VecStorage<T, 1, typename std::enable_if_t<is_sigeninteger_v<T>>> {
2106  using DataType = T;
2107 #ifdef __SYCL_DEVICE_ONLY__
2108  using VectorDataType = DataType;
2109 #endif // __SYCL_DEVICE_ONLY__
2110 };
2111 
2112 // Single element unsigned integers
2113 template <typename T>
2114 struct VecStorage<T, 1, typename std::enable_if_t<is_sugeninteger_v<T>>> {
2115  using DataType = T;
2116 #ifdef __SYCL_DEVICE_ONLY__
2117  using VectorDataType = DataType;
2118 #endif // __SYCL_DEVICE_ONLY__
2119 };
2120 
2121 // Single element floating-point (except half/bfloat16)
2122 template <typename T>
2123 struct VecStorage<
2124  T, 1,
2125  typename std::enable_if_t<!is_half_or_bf16_v<T> && is_sgenfloat_v<T>>> {
2126  using DataType = T;
2127 #ifdef __SYCL_DEVICE_ONLY__
2128  using VectorDataType = DataType;
2129 #endif // __SYCL_DEVICE_ONLY__
2130 };
2131 // Multiple elements signed/unsigned integers and floating-point (except
2132 // half/bfloat16)
2133 template <typename T, int N>
2134 struct VecStorage<
2135  T, N,
2136  typename std::enable_if_t<isValidVectorSize(N) &&
2137  (is_sgeninteger_v<T> ||
2138  (is_sgenfloat_v<T> && !is_half_or_bf16_v<T>))>> {
2139  using DataType =
2140  typename VecStorageImpl<typename VecStorage<T, 1>::DataType, N>::DataType;
2141 #ifdef __SYCL_DEVICE_ONLY__
2142  using VectorDataType =
2143  typename VecStorageImpl<typename VecStorage<T, 1>::DataType,
2144  N>::VectorDataType;
2145 #endif // __SYCL_DEVICE_ONLY__
2146 };
2147 
2148 // Single element half
2149 template <> struct VecStorage<half, 1, void> {
2150  using DataType = sycl::detail::half_impl::StorageT;
2151 #ifdef __SYCL_DEVICE_ONLY__
2152  using VectorDataType = sycl::detail::half_impl::StorageT;
2153 #endif // __SYCL_DEVICE_ONLY__
2154 };
2155 
2156 // Multiple elements half
2157 #if defined(__SYCL_DEVICE_ONLY__)
2158 #define __SYCL_DEFINE_HALF_VECSTORAGE(Num) \
2159  template <> struct VecStorage<half, Num, void> { \
2160  using DataType = sycl::detail::half_impl::Vec##Num##StorageT; \
2161  using VectorDataType = sycl::detail::half_impl::Vec##Num##StorageT; \
2162  };
2163 #else // defined(__SYCL_DEVICE_ONLY__)
2164 #define __SYCL_DEFINE_HALF_VECSTORAGE(Num) \
2165  template <> struct VecStorage<half, Num, void> { \
2166  using DataType = sycl::detail::half_impl::Vec##Num##StorageT; \
2167  };
2168 #endif // defined(__SYCL_DEVICE_ONLY__)
2169 
2170 __SYCL_DEFINE_HALF_VECSTORAGE(2)
2171 __SYCL_DEFINE_HALF_VECSTORAGE(3)
2172 __SYCL_DEFINE_HALF_VECSTORAGE(4)
2173 __SYCL_DEFINE_HALF_VECSTORAGE(8)
2174 __SYCL_DEFINE_HALF_VECSTORAGE(16)
2175 #undef __SYCL_DEFINE_HALF_VECSTORAGE
2176 
2177 // Single element bfloat16
2178 template <> struct VecStorage<sycl::ext::oneapi::bfloat16, 1, void> {
2179  using DataType = sycl::ext::oneapi::detail::Bfloat16StorageT;
2180  // using VectorDataType = sycl::ext::oneapi::bfloat16;
2181  using VectorDataType = sycl::ext::oneapi::detail::Bfloat16StorageT;
2182 };
2183 // Multiple elements bfloat16
2184 #define __SYCL_DEFINE_BF16_VECSTORAGE(Num) \
2185  template <> struct VecStorage<sycl::ext::oneapi::bfloat16, Num, void> { \
2186  using DataType = sycl::ext::oneapi::detail::bf16::Vec##Num##StorageT; \
2187  using VectorDataType = \
2188  sycl::ext::oneapi::detail::bf16::Vec##Num##StorageT; \
2189  };
2190 __SYCL_DEFINE_BF16_VECSTORAGE(2)
2191 __SYCL_DEFINE_BF16_VECSTORAGE(3)
2192 __SYCL_DEFINE_BF16_VECSTORAGE(4)
2193 __SYCL_DEFINE_BF16_VECSTORAGE(8)
2194 __SYCL_DEFINE_BF16_VECSTORAGE(16)
2195 #undef __SYCL_DEFINE_BF16_VECSTORAGE
2196 } // namespace detail
2197 
2201 #define SYCL_DEVICE_COPYABLE 1
2202 
2210 template <typename T> struct is_device_copyable;
2211 
2212 namespace detail {
2213 template <typename T, typename = void>
2214 struct is_device_copyable_impl : std::is_trivially_copyable<T> {};
2215 
2216 template <typename T>
2217 struct is_device_copyable_impl<
2218  T, std::enable_if_t<!std::is_same_v<T, std::remove_cv_t<T>>>>
2219  // Cannot express this "recursion" (to take user's partial non-cv
2220  // specializations into account) without this helper struct.
2221  : is_device_copyable<std::remove_cv_t<T>> {};
2222 } // namespace detail
2223 
2224 template <typename T>
2225 struct is_device_copyable : detail::is_device_copyable_impl<T> {};
2226 
2227 // std::array<T, 0> is implicitly device copyable type.
2228 template <typename T>
2229 struct is_device_copyable<std::array<T, 0>> : std::true_type {};
2230 
2231 // std::array<T, N> is implicitly device copyable type if T is device copyable.
2232 template <typename T, std::size_t N>
2233 struct is_device_copyable<std::array<T, N>> : is_device_copyable<T> {};
2234 
2235 // std::optional<T> is implicitly device copyable type if T is device copyable.
2236 template <typename T>
2237 struct is_device_copyable<std::optional<T>> : is_device_copyable<T> {};
2238 
2239 // std::pair<T1, T2> is implicitly device copyable type if T1 and T2 are device
2240 // copyable.
2241 template <typename T1, typename T2>
2242 struct is_device_copyable<std::pair<T1, T2>>
2243  : std::bool_constant<is_device_copyable<T1>::value &&
2244  is_device_copyable<T2>::value> {};
2245 
2246 // std::tuple<Ts...> is implicitly device copyable type if each type T of Ts...
2247 // is device copyable.
2248 template <typename... Ts>
2249 struct is_device_copyable<std::tuple<Ts...>>
2250  : std::bool_constant<(... && is_device_copyable<Ts>::value)> {};
2251 
2252 // std::variant<Ts...> is implicitly device copyable type if each type T of
2253 // Ts... is device copyable.
2254 template <typename... Ts>
2255 struct is_device_copyable<std::variant<Ts...>>
2256  : std::bool_constant<(... && is_device_copyable<Ts>::value)> {};
2257 
2258 // marray is device copyable if element type is device copyable.
2259 template <typename T, std::size_t N>
2260 struct is_device_copyable<sycl::marray<T, N>> : is_device_copyable<T> {};
2261 
2262 // array is device copyable if element type is device copyable.
2263 template <typename T, std::size_t N>
2264 struct is_device_copyable<T[N]> : is_device_copyable<T> {};
2265 
2266 template <typename T>
2267 inline constexpr bool is_device_copyable_v = is_device_copyable<T>::value;
2268 
2269 namespace detail {
2270 template <typename T, typename = void>
2271 struct IsDeprecatedDeviceCopyable : std::false_type {};
2272 
2273 // TODO: using C++ attribute [[deprecated]] or the macro __SYCL2020_DEPRECATED
2274 // does not produce expected warning message for the type 'T'.
2275 template <typename T>
2276 struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020")
2277  IsDeprecatedDeviceCopyable<
2278  T, std::enable_if_t<std::is_trivially_copy_constructible_v<T> &&
2279  std::is_trivially_destructible_v<T> &&
2280  !is_device_copyable_v<T>>> : std::true_type {};
2281 
2282 template <typename T, int N>
2283 struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020")
2284  IsDeprecatedDeviceCopyable<T[N]> : IsDeprecatedDeviceCopyable<T> {};
2285 
2286 #ifdef __SYCL_DEVICE_ONLY__
2287 // Checks that the fields of the type T with indices 0 to (NumFieldsToCheck -
2288 // 1) are device copyable.
2289 template <typename T, unsigned NumFieldsToCheck>
2290 struct CheckFieldsAreDeviceCopyable
2291  : CheckFieldsAreDeviceCopyable<T, NumFieldsToCheck - 1> {
2292  using FieldT = decltype(__builtin_field_type(T, NumFieldsToCheck - 1));
2293  static_assert(is_device_copyable_v<FieldT> ||
2294  detail::IsDeprecatedDeviceCopyable<FieldT>::value,
2295  "The specified type is not device copyable");
2296 };
2297 
2298 template <typename T> struct CheckFieldsAreDeviceCopyable<T, 0> {};
2299 
2300 // Checks that the base classes of the type T with indices 0 to
2301 // (NumFieldsToCheck - 1) are device copyable.
2302 template <typename T, unsigned NumBasesToCheck>
2303 struct CheckBasesAreDeviceCopyable
2304  : CheckBasesAreDeviceCopyable<T, NumBasesToCheck - 1> {
2305  using BaseT = decltype(__builtin_base_type(T, NumBasesToCheck - 1));
2306  static_assert(is_device_copyable_v<BaseT> ||
2307  detail::IsDeprecatedDeviceCopyable<BaseT>::value,
2308  "The specified type is not device copyable");
2309 };
2310 
2311 template <typename T> struct CheckBasesAreDeviceCopyable<T, 0> {};
2312 
2313 // All the captures of a lambda or functor of type FuncT passed to a kernel
2314 // must be is_device_copyable, which extends to bases and fields of FuncT.
2315 // Fields are captures of lambda/functors and bases are possible base classes
2316 // of functors also allowed by SYCL.
2317 // The SYCL-2020 implementation must check each of the fields & bases of the
2318 // type FuncT, only one level deep, which is enough to see if they are all
2319 // device copyable by using the result of is_device_copyable returned for them.
2320 // At this moment though the check also allowes using types for which
2321 // (is_trivially_copy_constructible && is_trivially_destructible) returns true
2322 // and (is_device_copyable) returns false. That is the deprecated behavior and
2323 // is currently/temporarily supported only to not break older SYCL programs.
2324 template <typename FuncT>
2325 struct CheckDeviceCopyable
2326  : CheckFieldsAreDeviceCopyable<FuncT, __builtin_num_fields(FuncT)>,
2327  CheckBasesAreDeviceCopyable<FuncT, __builtin_num_bases(FuncT)> {};
2328 
2329 // Below are two specializations for CheckDeviceCopyable when a kernel lambda
2330 // is wrapped after range rounding optimization.
2331 template <typename TransformedArgType, int Dims, typename KernelType>
2332 struct CheckDeviceCopyable<
2333  RoundedRangeKernel<TransformedArgType, Dims, KernelType>>
2334  : CheckDeviceCopyable<KernelType> {};
2335 
2336 template <typename TransformedArgType, int Dims, typename KernelType>
2337 struct CheckDeviceCopyable<
2338  RoundedRangeKernelWithKH<TransformedArgType, Dims, KernelType>>
2339  : CheckDeviceCopyable<KernelType> {};
2340 
2341 #endif // __SYCL_DEVICE_ONLY__
2342 } // namespace detail
2343 
2344 } // namespace _V1
2345 } // namespace sycl
DataT operator()(DataT, DataT)
Definition: types.hpp:191
DataT getValue(size_t) const
Definition: types.hpp:190
DataT getValue(size_t) const
Definition: types.hpp:201
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: types.hpp:284
detail::host_half_impl::half StorageT
Definition: half_type.hpp:253
std::conditional_t< sizeof(_IN)==1, T8, std::conditional_t< sizeof(_IN)==2, T16, std::conditional_t< sizeof(_IN)==4, T32, T64 > >> select_apply_cl_t
Definition: types.hpp:104
typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_char), opencl::cl_char, typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_short), opencl::cl_short, typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_int), opencl::cl_int, typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_long), opencl::cl_long, bool > >> > rel_t
Definition: types.hpp:183
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
constexpr size_t MaxVecAlignment
std::int32_t cl_int
Definition: aliases.hpp:134
std::int8_t cl_char
Definition: aliases.hpp:130
std::int16_t cl_short
Definition: aliases.hpp:132
std::int64_t cl_long
Definition: aliases.hpp:136
unsigned char byte
Definition: image.hpp:107
typename detail::vec_helper< T >::RetType vec_data_t
Definition: types.hpp:278
detail::vec_helper< T > vec_data
Definition: types.hpp:275
Definition: access.hpp:18
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:208
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:220
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:232
constexpr T operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:262
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:226
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:238
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:244
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:250
constexpr rel_t< T > operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:214
constexpr T operator()(const T &Lhs, const T &Rhs) const
Definition: types.hpp:256
static constexpr RetType set(bool value)
Definition: types.hpp:115
select_apply_cl_t< bool, std::int8_t, std::int16_t, std::int32_t, std::int64_t > RetType
Definition: types.hpp:113
static constexpr RetType get(bool value)
Definition: types.hpp:114
static constexpr RetType get(std::byte value)
Definition: types.hpp:156
static constexpr RetType set(std::byte value)
Definition: types.hpp:157
static constexpr std::byte get(std::uint8_t value)
Definition: types.hpp:158
static constexpr std::byte set(std::uint8_t value)
Definition: types.hpp:161
static constexpr RetType get(BFloat16StorageT value)
Definition: types.hpp:121
static constexpr BFloat16StorageT set(RetType value)
Definition: types.hpp:139
static constexpr RetType get(RetType value)
Definition: types.hpp:137
sycl::ext::oneapi::detail::Bfloat16StorageT BFloat16StorageT
Definition: types.hpp:120
static constexpr RetType set(T value)
Definition: types.hpp:109
static constexpr RetType get(T value)
Definition: types.hpp:108
static constexpr int g
Definition: types.hpp:76
static constexpr int s4
Definition: types.hpp:83
static constexpr int s9
Definition: types.hpp:88
static constexpr int s5
Definition: types.hpp:84
static constexpr int s1
Definition: types.hpp:80
static constexpr int s3
Definition: types.hpp:82
static constexpr int s0
Definition: types.hpp:79
static constexpr int sF
Definition: types.hpp:94
static constexpr int s7
Definition: types.hpp:86
static constexpr int x
Definition: types.hpp:71
static constexpr int z
Definition: types.hpp:73
static constexpr int s8
Definition: types.hpp:87
static constexpr int a
Definition: types.hpp:78
static constexpr int sD
Definition: types.hpp:92
static constexpr int y
Definition: types.hpp:72
static constexpr int w
Definition: types.hpp:74
static constexpr int sC
Definition: types.hpp:91
static constexpr int sB
Definition: types.hpp:90
static constexpr int sA
Definition: types.hpp:89
static constexpr int s2
Definition: types.hpp:81
static constexpr int b
Definition: types.hpp:77
static constexpr int r
Definition: types.hpp:75
static constexpr int sE
Definition: types.hpp:93
static constexpr int s6
Definition: types.hpp:85
Implementation of vec::convert.