DPC++ Runtime
Runtime libraries for oneAPI DPC++
vector.hpp
Go to the documentation of this file.
1 //==---------------- vector.hpp --- Implements sycl::vec -------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 // Check if Clang's ext_vector_type attribute is available. Host compiler
12 // may not be Clang, and Clang may not be built with the extension.
13 #ifdef __clang__
14 #ifndef __has_extension
15 #define __has_extension(x) 0
16 #endif
17 #ifdef __HAS_EXT_VECTOR_TYPE__
18 #error "Undefine __HAS_EXT_VECTOR_TYPE__ macro"
19 #endif
20 #if __has_extension(attribute_ext_vector_type)
21 #define __HAS_EXT_VECTOR_TYPE__
22 #endif
23 #endif // __clang__
24 
25 #if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__)
26 #error "SYCL device compiler is built without ext_vector_type support"
27 #endif
28 
29 #if defined(__SYCL_DEVICE_ONLY__)
30 #define __SYCL_USE_EXT_VECTOR_TYPE__
31 #endif
32 
33 #include <sycl/access/access.hpp> // for decorated, address_space
34 #include <sycl/aliases.hpp> // for half, cl_char, cl_int
35 #include <sycl/detail/common.hpp> // for ArrayCreator, RepeatV...
36 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED
37 #include <sycl/detail/generic_type_lists.hpp> // for vector_basic_list
38 #include <sycl/detail/generic_type_traits.hpp> // for is_sigeninteger, is_s...
39 #include <sycl/detail/memcpy.hpp> // for memcpy
40 #include <sycl/detail/type_list.hpp> // for is_contained
41 #include <sycl/detail/type_traits.hpp> // for is_floating_point
42 #include <sycl/detail/vector_convert.hpp> // for convertImpl
43 #include <sycl/detail/vector_traits.hpp> // for vector_alignment
44 #include <sycl/half_type.hpp> // for StorageT, half, Vec16...
45 
46 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
47 
48 #include <array> // for array
49 #include <assert.h> // for assert
50 #include <cstddef> // for size_t, NULL, byte
51 #include <cstdint> // for uint8_t, int16_t, int...
52 #include <functional> // for divides, multiplies
53 #include <iterator> // for pair
54 #include <ostream> // for operator<<, basic_ost...
55 #include <type_traits> // for enable_if_t, is_same
56 #include <utility> // for index_sequence, make_...
57 
58 namespace sycl {
59 inline namespace _V1 {
60 
61 struct elem {
62  static constexpr int x = 0;
63  static constexpr int y = 1;
64  static constexpr int z = 2;
65  static constexpr int w = 3;
66  static constexpr int r = 0;
67  static constexpr int g = 1;
68  static constexpr int b = 2;
69  static constexpr int a = 3;
70  static constexpr int s0 = 0;
71  static constexpr int s1 = 1;
72  static constexpr int s2 = 2;
73  static constexpr int s3 = 3;
74  static constexpr int s4 = 4;
75  static constexpr int s5 = 5;
76  static constexpr int s6 = 6;
77  static constexpr int s7 = 7;
78  static constexpr int s8 = 8;
79  static constexpr int s9 = 9;
80  static constexpr int sA = 10;
81  static constexpr int sB = 11;
82  static constexpr int sC = 12;
83  static constexpr int sD = 13;
84  static constexpr int sE = 14;
85  static constexpr int sF = 15;
86 };
87 
88 namespace detail {
89 // select_apply_cl_t selects from T8/T16/T32/T64 basing on
90 // sizeof(_IN). expected to handle scalar types in _IN.
91 template <typename _IN, typename T8, typename T16, typename T32, typename T64>
92 using select_apply_cl_t = std::conditional_t<
93  sizeof(_IN) == 1, T8,
94  std::conditional_t<sizeof(_IN) == 2, T16,
95  std::conditional_t<sizeof(_IN) == 4, T32, T64>>>;
96 
97 template <typename T> struct vec_helper {
98  using RetType = T;
99  static constexpr RetType get(T value) { return value; }
100  static constexpr RetType set(T value) { return value; }
101 };
102 template <> struct vec_helper<bool> {
103  using RetType = select_apply_cl_t<bool, std::int8_t, std::int16_t,
104  std::int32_t, std::int64_t>;
105  static constexpr RetType get(bool value) { return value; }
106  static constexpr RetType set(bool value) { return value; }
107 };
108 
109 template <> struct vec_helper<sycl::ext::oneapi::bfloat16> {
112  static constexpr RetType get(BFloat16StorageT value) {
113 #if defined(__SYCL_BITCAST_IS_CONSTEXPR)
114  return sycl::bit_cast<RetType>(value);
115 #else
116  // awkward workaround. sycl::bit_cast isn't constexpr in older GCC
117  // C++20 will give us both std::bit_cast and constexpr reinterpet for void*
118  // but neither available yet.
119  union {
122  } result = {};
123  result.storage = value;
124  return result.bf16;
125 #endif
126  }
127 
128  static constexpr RetType get(RetType value) { return value; }
129 
130  static constexpr BFloat16StorageT set(RetType value) {
131 #if defined(__SYCL_BITCAST_IS_CONSTEXPR)
132  return sycl::bit_cast<BFloat16StorageT>(value);
133 #else
134  union {
137  } result = {};
138  result.bf16 = value;
139  return result.storage;
140 #endif
141  }
142 };
143 
144 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
145 template <> struct vec_helper<std::byte> {
146  using RetType = std::uint8_t;
147  static constexpr RetType get(std::byte value) { return (RetType)value; }
148  static constexpr RetType set(std::byte value) { return (RetType)value; }
149  static constexpr std::byte get(std::uint8_t value) {
150  return (std::byte)value;
151  }
152  static constexpr std::byte set(std::uint8_t value) {
153  return (std::byte)value;
154  }
155 };
156 #endif
157 
158 template <typename VecT, typename OperationLeftT, typename OperationRightT,
159  template <typename> class OperationCurrentT, int... Indexes>
160 class SwizzleOp;
161 
162 template <typename T, int N, typename V = void> struct VecStorage;
163 
164 // Element type for relational operator return value.
165 template <typename DataT>
166 using rel_t = typename std::conditional_t<
167  sizeof(DataT) == sizeof(opencl::cl_char), opencl::cl_char,
168  typename std::conditional_t<
169  sizeof(DataT) == sizeof(opencl::cl_short), opencl::cl_short,
170  typename std::conditional_t<
171  sizeof(DataT) == sizeof(opencl::cl_int), opencl::cl_int,
172  typename std::conditional_t<sizeof(DataT) ==
173  sizeof(opencl::cl_long),
174  opencl::cl_long, bool>>>>;
175 
176 // Special type indicating that SwizzleOp should just read value from vector -
177 // not trying to perform any operations. Should not be called.
178 template <typename T> class GetOp {
179 public:
180  using DataT = T;
181  DataT getValue(size_t) const { return (DataT)0; }
182  DataT operator()(DataT, DataT) { return (DataT)0; }
183 };
184 
185 // Forward declarations
186 template <typename TransformedArgType, int Dims, typename KernelType>
187 class RoundedRangeKernel;
188 template <typename TransformedArgType, int Dims, typename KernelType>
189 class RoundedRangeKernelWithKH;
190 
191 // Vectors of size 1 are handled separately and therefore 1 is not included in
192 // the check below.
193 constexpr bool isValidVectorSize(int N) {
194  return N == 2 || N == 3 || N == 4 || N == 8 || N == 16;
195 }
196 template <typename T, int N, typename V> struct VecStorage {
197  static_assert(
198  isValidVectorSize(N) || N == 1,
199  "Incorrect number of elements for sycl::vec: only 1, 2, 3, 4, 8 "
200  "or 16 are supported");
201  static_assert(!std::is_same_v<V, void>, "Incorrect data type for sycl::vec");
202 };
203 
204 #ifdef __SYCL_DEVICE_ONLY__
205 // device always has ext vector support, but for huge vectors
206 // we switch to std::array, so that we can use a smaller alignment (64)
207 // this is to support MSVC, which has a max of 64 for direct params.
208 template <typename T, int N> struct VecStorageImpl {
209  static constexpr size_t Num = (N == 3) ? 4 : N;
210  static constexpr size_t Sz = Num * sizeof(T);
211  using DataType =
212  typename std::conditional<Sz <= 64, T __attribute__((ext_vector_type(N))),
213  std::array<T, Num>>::type;
214  using VectorDataType = T __attribute__((ext_vector_type(N)));
215 };
216 #else // __SYCL_DEVICE_ONLY__
217 template <typename T, int N> struct VecStorageImpl {
218  using DataType = std::array<T, (N == 3) ? 4 : N>;
219 };
220 #endif // __SYCL_DEVICE_ONLY__
221 
222 // Single element bool
223 template <> struct VecStorage<bool, 1, void> {
224  using DataType = bool;
225 #ifdef __SYCL_DEVICE_ONLY__
226  using VectorDataType = bool;
227 #endif // __SYCL_DEVICE_ONLY__
228 };
229 
230 // Multiple element bool
231 template <int N>
232 struct VecStorage<bool, N, typename std::enable_if_t<isValidVectorSize(N)>> {
233  using DataType =
234  typename VecStorageImpl<select_apply_cl_t<bool, std::int8_t, std::int16_t,
235  std::int32_t, std::int64_t>,
236  N>::DataType;
237 #ifdef __SYCL_DEVICE_ONLY__
238  using VectorDataType =
239  typename VecStorageImpl<select_apply_cl_t<bool, std::int8_t, std::int16_t,
240  std::int32_t, std::int64_t>,
241  N>::VectorDataType;
242 #endif // __SYCL_DEVICE_ONLY__
243 };
244 
245 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
246 // Single element byte. Multiple elements will propagate through a later
247 // specialization.
248 template <> struct VecStorage<std::byte, 1, void> {
249  using DataType = std::int8_t;
250 #ifdef __SYCL_DEVICE_ONLY__
251  using VectorDataType = std::int8_t;
252 #endif // __SYCL_DEVICE_ONLY__
253 };
254 #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
255 
256 // Single element signed integers
257 template <typename T>
258 struct VecStorage<T, 1, typename std::enable_if_t<is_sigeninteger_v<T>>> {
259  using DataType = T;
260 #ifdef __SYCL_DEVICE_ONLY__
261  using VectorDataType = DataType;
262 #endif // __SYCL_DEVICE_ONLY__
263 };
264 
265 // Single element unsigned integers
266 template <typename T>
267 struct VecStorage<T, 1, typename std::enable_if_t<is_sugeninteger_v<T>>> {
268  using DataType = T;
269 #ifdef __SYCL_DEVICE_ONLY__
270  using VectorDataType = DataType;
271 #endif // __SYCL_DEVICE_ONLY__
272 };
273 
274 // Single element floating-point (except half/bfloat16)
275 template <typename T>
276 struct VecStorage<
277  T, 1,
278  typename std::enable_if_t<!is_half_or_bf16_v<T> && is_sgenfloat_v<T>>> {
279  using DataType = T;
280 #ifdef __SYCL_DEVICE_ONLY__
281  using VectorDataType = DataType;
282 #endif // __SYCL_DEVICE_ONLY__
283 };
284 // Multiple elements signed/unsigned integers and floating-point (except
285 // half/bfloat16)
286 template <typename T, int N>
287 struct VecStorage<
288  T, N,
289  typename std::enable_if_t<isValidVectorSize(N) &&
290  (is_sgeninteger_v<T> ||
291  (is_sgenfloat_v<T> && !is_half_or_bf16_v<T>))>> {
292  using DataType =
294 #ifdef __SYCL_DEVICE_ONLY__
295  using VectorDataType =
297  N>::VectorDataType;
298 #endif // __SYCL_DEVICE_ONLY__
299 };
300 
301 // Single element half
302 template <> struct VecStorage<half, 1, void> {
304 #ifdef __SYCL_DEVICE_ONLY__
305  using VectorDataType = sycl::detail::half_impl::VecElemT;
306 #endif // __SYCL_DEVICE_ONLY__
307 };
308 
309 // Multiple elements half
310 #if defined(__SYCL_DEVICE_ONLY__)
311 #define __SYCL_DEFINE_HALF_VECSTORAGE(Num) \
312  template <> struct VecStorage<half, Num, void> { \
313  using DataType = sycl::detail::half_impl::Vec##Num##StorageT; \
314  using VectorDataType = sycl::detail::half_impl::Vec##Num##StorageT; \
315  };
316 #else // defined(__SYCL_DEVICE_ONLY__)
317 #define __SYCL_DEFINE_HALF_VECSTORAGE(Num) \
318  template <> struct VecStorage<half, Num, void> { \
319  using DataType = sycl::detail::half_impl::Vec##Num##StorageT; \
320  };
321 #endif // defined(__SYCL_DEVICE_ONLY__)
322 
328 #undef __SYCL_DEFINE_HALF_VECSTORAGE
329 
330 // Single element bfloat16
331 template <> struct VecStorage<sycl::ext::oneapi::bfloat16, 1, void> {
333  // using VectorDataType = sycl::ext::oneapi::bfloat16;
335 };
336 // Multiple elements bfloat16
337 #define __SYCL_DEFINE_BF16_VECSTORAGE(Num) \
338  template <> struct VecStorage<sycl::ext::oneapi::bfloat16, Num, void> { \
339  using DataType = sycl::ext::oneapi::detail::bf16::Vec##Num##StorageT; \
340  using VectorDataType = \
341  sycl::ext::oneapi::detail::bf16::Vec##Num##StorageT; \
342  };
348 #undef __SYCL_DEFINE_BF16_VECSTORAGE
349 } // namespace detail
350 
351 template <typename T> using vec_data = detail::vec_helper<T>;
352 
353 template <typename T>
355 
361 template <typename Type, int NumElements> class vec {
362  using DataT = Type;
363 
364  // This represent type of underlying value. There should be only one field
365  // in the class, so vec<float, 16> should be equal to float16 in memory.
366  using DataType = typename detail::VecStorage<DataT, NumElements>::DataType;
367 
368 #ifdef __SYCL_DEVICE_ONLY__
369  static constexpr bool IsHostHalf = false;
370 #else
371  static constexpr bool IsHostHalf =
372  std::is_same_v<DataT, sycl::detail::half_impl::half>;
373 #endif
374 
375  static constexpr bool IsBfloat16 =
376  std::is_same_v<DataT, sycl::ext::oneapi::bfloat16>;
377 
378  static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements;
379  static constexpr size_t Sz = sizeof(DataT) * AdjustedNum;
380  static constexpr bool IsSizeGreaterThanMaxAlign =
382 
383  // TODO: There is no support for vector half type on host yet.
384  // Also, when Sz is greater than alignment, we use std::array instead of
385  // vector extension. This is for MSVC compatibility, which has a max alignment
386  // of 64 for direct params. If we drop MSVC, we can have alignment the same as
387  // size and use vector extensions for all sizes.
388  static constexpr bool IsUsingArrayOnDevice =
389  (IsHostHalf || IsBfloat16 || IsSizeGreaterThanMaxAlign);
390 
391 #if defined(__SYCL_DEVICE_ONLY__)
392  static constexpr bool NativeVec = NumElements > 1 && !IsUsingArrayOnDevice;
393  static constexpr bool IsUsingArrayOnHost = false; // not compiling for host.
394 #else
395  static constexpr bool NativeVec = false;
396  static constexpr bool IsUsingArrayOnHost = true; // host always std::array.
397 #endif
398 
399  static constexpr int getNumElements() { return NumElements; }
400 
401  // SizeChecker is needed for vec(const argTN &... args) ctor to validate args.
402  template <int Counter, int MaxValue, class...>
403  struct SizeChecker : std::conditional_t<Counter == MaxValue, std::true_type,
404  std::false_type> {};
405 
406  template <int Counter, int MaxValue, typename DataT_, class... tail>
407  struct SizeChecker<Counter, MaxValue, DataT_, tail...>
408  : std::conditional_t<Counter + 1 <= MaxValue,
409  SizeChecker<Counter + 1, MaxValue, tail...>,
410  std::false_type> {};
411 
412  // Utility trait for creating an std::array from an vector argument.
413  template <typename DataT_, typename T, std::size_t... Is>
414  static constexpr std::array<DataT_, sizeof...(Is)>
415  VecToArray(const vec<T, sizeof...(Is)> &V, std::index_sequence<Is...>) {
416  return {static_cast<DataT_>(V.getValue(Is))...};
417  }
418  template <typename DataT_, typename T, int N, typename T2, typename T3,
419  template <typename> class T4, int... T5, std::size_t... Is>
420  static constexpr std::array<DataT_, sizeof...(Is)>
421  VecToArray(const detail::SwizzleOp<vec<T, N>, T2, T3, T4, T5...> &V,
422  std::index_sequence<Is...>) {
423  return {static_cast<DataT_>(V.getValue(Is))...};
424  }
425  template <typename DataT_, typename T, int N, typename T2, typename T3,
426  template <typename> class T4, int... T5, std::size_t... Is>
427  static constexpr std::array<DataT_, sizeof...(Is)>
428  VecToArray(const detail::SwizzleOp<const vec<T, N>, T2, T3, T4, T5...> &V,
429  std::index_sequence<Is...>) {
430  return {static_cast<DataT_>(V.getValue(Is))...};
431  }
432  template <typename DataT_, typename T, int N>
433  static constexpr std::array<DataT_, N>
434  FlattenVecArgHelper(const vec<T, N> &A) {
435  return VecToArray<DataT_>(A, std::make_index_sequence<N>());
436  }
437  template <typename DataT_, typename T, int N, typename T2, typename T3,
438  template <typename> class T4, int... T5>
439  static constexpr std::array<DataT_, sizeof...(T5)> FlattenVecArgHelper(
440  const detail::SwizzleOp<vec<T, N>, T2, T3, T4, T5...> &A) {
441  return VecToArray<DataT_>(A, std::make_index_sequence<sizeof...(T5)>());
442  }
443  template <typename DataT_, typename T, int N, typename T2, typename T3,
444  template <typename> class T4, int... T5>
445  static constexpr std::array<DataT_, sizeof...(T5)> FlattenVecArgHelper(
446  const detail::SwizzleOp<const vec<T, N>, T2, T3, T4, T5...> &A) {
447  return VecToArray<DataT_>(A, std::make_index_sequence<sizeof...(T5)>());
448  }
449  template <typename DataT_, typename T>
450  static constexpr auto FlattenVecArgHelper(const T &A) {
451  return std::array<DataT_, 1>{vec_data<DataT_>::get(static_cast<DataT_>(A))};
452  }
453  template <typename DataT_, typename T> struct FlattenVecArg {
454  constexpr auto operator()(const T &A) const {
455  return FlattenVecArgHelper<DataT_>(A);
456  }
457  };
458 
459  // Alias for shortening the vec arguments to array converter.
460  template <typename DataT_, typename... ArgTN>
461  using VecArgArrayCreator =
462  detail::ArrayCreator<DataT_, FlattenVecArg, ArgTN...>;
463 
464 #define __SYCL_ALLOW_VECTOR_SIZES(num_elements) \
465  template <int Counter, int MaxValue, typename DataT_, class... tail> \
466  struct SizeChecker<Counter, MaxValue, vec<DataT_, num_elements>, tail...> \
467  : std::conditional_t< \
468  Counter + (num_elements) <= MaxValue, \
469  SizeChecker<Counter + (num_elements), MaxValue, tail...>, \
470  std::false_type> {}; \
471  template <int Counter, int MaxValue, typename DataT_, typename T2, \
472  typename T3, template <typename> class T4, int... T5, \
473  class... tail> \
474  struct SizeChecker< \
475  Counter, MaxValue, \
476  detail::SwizzleOp<vec<DataT_, num_elements>, T2, T3, T4, T5...>, \
477  tail...> \
478  : std::conditional_t< \
479  Counter + sizeof...(T5) <= MaxValue, \
480  SizeChecker<Counter + sizeof...(T5), MaxValue, tail...>, \
481  std::false_type> {}; \
482  template <int Counter, int MaxValue, typename DataT_, typename T2, \
483  typename T3, template <typename> class T4, int... T5, \
484  class... tail> \
485  struct SizeChecker< \
486  Counter, MaxValue, \
487  detail::SwizzleOp<const vec<DataT_, num_elements>, T2, T3, T4, T5...>, \
488  tail...> \
489  : std::conditional_t< \
490  Counter + sizeof...(T5) <= MaxValue, \
491  SizeChecker<Counter + sizeof...(T5), MaxValue, tail...>, \
492  std::false_type> {};
493 
494  __SYCL_ALLOW_VECTOR_SIZES(1)
495  __SYCL_ALLOW_VECTOR_SIZES(2)
496  __SYCL_ALLOW_VECTOR_SIZES(3)
497  __SYCL_ALLOW_VECTOR_SIZES(4)
498  __SYCL_ALLOW_VECTOR_SIZES(8)
499  __SYCL_ALLOW_VECTOR_SIZES(16)
500 #undef __SYCL_ALLOW_VECTOR_SIZES
501 
502  // TypeChecker is needed for vec(const argTN &... args) ctor to validate args.
503  template <typename T, typename DataT_>
504  struct TypeChecker : std::is_convertible<T, DataT_> {};
505 #define __SYCL_ALLOW_VECTOR_TYPES(num_elements) \
506  template <typename DataT_> \
507  struct TypeChecker<vec<DataT_, num_elements>, DataT_> : std::true_type {}; \
508  template <typename DataT_, typename T2, typename T3, \
509  template <typename> class T4, int... T5> \
510  struct TypeChecker< \
511  detail::SwizzleOp<vec<DataT_, num_elements>, T2, T3, T4, T5...>, DataT_> \
512  : std::true_type {}; \
513  template <typename DataT_, typename T2, typename T3, \
514  template <typename> class T4, int... T5> \
515  struct TypeChecker< \
516  detail::SwizzleOp<const vec<DataT_, num_elements>, T2, T3, T4, T5...>, \
517  DataT_> : std::true_type {};
518 
519  __SYCL_ALLOW_VECTOR_TYPES(1)
520  __SYCL_ALLOW_VECTOR_TYPES(2)
521  __SYCL_ALLOW_VECTOR_TYPES(3)
522  __SYCL_ALLOW_VECTOR_TYPES(4)
523  __SYCL_ALLOW_VECTOR_TYPES(8)
524  __SYCL_ALLOW_VECTOR_TYPES(16)
525 #undef __SYCL_ALLOW_VECTOR_TYPES
526 
527  template <int... Indexes>
528  using Swizzle =
529  detail::SwizzleOp<vec, detail::GetOp<DataT>, detail::GetOp<DataT>,
530  detail::GetOp, Indexes...>;
531 
532  template <int... Indexes>
533  using ConstSwizzle =
534  detail::SwizzleOp<const vec, detail::GetOp<DataT>, detail::GetOp<DataT>,
535  detail::GetOp, Indexes...>;
536 
537  // Shortcuts for args validation in vec(const argTN &... args) ctor.
538  template <typename... argTN>
539  using EnableIfSuitableTypes = typename std::enable_if_t<
540  std::conjunction_v<TypeChecker<argTN, DataT>...>>;
541 
542  template <typename... argTN>
543  using EnableIfSuitableNumElements =
544  typename std::enable_if_t<SizeChecker<0, NumElements, argTN...>::value>;
545 
546  template <size_t... Is>
547  constexpr vec(const std::array<vec_data_t<DataT>, NumElements> &Arr,
548  std::index_sequence<Is...>)
549  : m_Data{([&](vec_data_t<DataT> v) constexpr {
550  if constexpr (std::is_same_v<sycl::ext::oneapi::bfloat16, DataT>)
551  return v.value;
552  else
553  return vec_data_t<DataT>(static_cast<DataT>(v));
554  })(Arr[Is])...} {}
555 
556 public:
557  using element_type = DataT;
558  using value_type = DataT;
559  using rel_t = detail::rel_t<DataT>;
560 #ifdef __SYCL_DEVICE_ONLY__
561  using vector_t =
562  typename detail::VecStorage<DataT, NumElements>::VectorDataType;
563 #endif // __SYCL_DEVICE_ONLY__
564 
565  vec() = default;
566 
567  constexpr vec(const vec &Rhs) = default;
568  constexpr vec(vec &&Rhs) = default;
569 
570  constexpr vec &operator=(const vec &Rhs) = default;
571 
572  // W/o this, things like "vec<char,*> = vec<signed char, *>" doesn't work.
573  template <typename Ty = DataT>
574  typename std::enable_if_t<!std::is_same_v<Ty, rel_t> &&
575  std::is_convertible_v<vec_data_t<Ty>, rel_t>,
576  vec &>
577  operator=(const vec<rel_t, NumElements> &Rhs) {
578  *this = Rhs.template as<vec>();
579  return *this;
580  }
581 
582 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
583  template <typename T = void>
584  using EnableIfNotHostHalf = typename std::enable_if_t<!IsHostHalf, T>;
585 
586  template <typename T = void>
587  using EnableIfHostHalf = typename std::enable_if_t<IsHostHalf, T>;
588 
589  template <typename T = void>
590  using EnableIfUsingArrayOnDevice =
591  typename std::enable_if_t<IsUsingArrayOnDevice, T>;
592 
593  template <typename T = void>
594  using EnableIfNotUsingArrayOnDevice =
595  typename std::enable_if_t<!IsUsingArrayOnDevice, T>;
596 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
597 
598  template <typename T = void>
599  using EnableIfUsingArray =
600  typename std::enable_if_t<IsUsingArrayOnDevice || IsUsingArrayOnHost, T>;
601 
602  template <typename T = void>
603  using EnableIfNotUsingArray =
604  typename std::enable_if_t<!IsUsingArrayOnDevice && !IsUsingArrayOnHost,
605  T>;
606 
607 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
608 
609  template <typename Ty = DataT>
610  explicit constexpr vec(const EnableIfNotUsingArrayOnDevice<Ty> &arg)
611  : m_Data{DataType(vec_data<Ty>::get(arg))} {}
612 
613  template <typename Ty = DataT>
614  typename std::enable_if_t<
615  std::is_fundamental_v<vec_data_t<Ty>> ||
616  detail::is_half_or_bf16_v<typename std::remove_const_t<Ty>>,
617  vec &>
618  operator=(const EnableIfNotUsingArrayOnDevice<Ty> &Rhs) {
619  m_Data = (DataType)vec_data<Ty>::get(Rhs);
620  return *this;
621  }
622 
623  template <typename Ty = DataT>
624  explicit constexpr vec(const EnableIfUsingArrayOnDevice<Ty> &arg)
625  : vec{detail::RepeatValue<NumElements>(
626  static_cast<vec_data_t<DataT>>(arg)),
627  std::make_index_sequence<NumElements>()} {}
628 
629  template <typename Ty = DataT>
630  typename std::enable_if_t<
631  std::is_fundamental_v<vec_data_t<Ty>> ||
632  detail::is_half_or_bf16_v<typename std::remove_const_t<Ty>>,
633  vec &>
634  operator=(const EnableIfUsingArrayOnDevice<Ty> &Rhs) {
635  for (int i = 0; i < NumElements; ++i) {
636  setValue(i, Rhs);
637  }
638  return *this;
639  }
640 #else // __SYCL_USE_EXT_VECTOR_TYPE__
641  explicit constexpr vec(const DataT &arg)
642  : vec{detail::RepeatValue<NumElements>(
643  static_cast<vec_data_t<DataT>>(arg)),
644  std::make_index_sequence<NumElements>()} {}
645 
646  template <typename Ty = DataT>
647  typename std::enable_if_t<
648  std::is_fundamental_v<vec_data_t<Ty>> ||
649  detail::is_half_or_bf16_v<typename std::remove_const_t<Ty>>,
650  vec &>
651  operator=(const DataT &Rhs) {
652  for (int i = 0; i < NumElements; ++i) {
653  setValue(i, Rhs);
654  }
655  return *this;
656  }
657 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
658 
659 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
660  // Optimized naive constructors with NumElements of DataT values.
661  // We don't expect compilers to optimize vararg recursive functions well.
662 
663  // Helper type to make specific constructors available only for specific
664  // number of elements.
665  template <int IdxNum, typename T = void>
666  using EnableIfMultipleElems = typename std::enable_if_t<
667  std::is_convertible_v<T, DataT> && NumElements == IdxNum, DataT>;
668  template <typename Ty = DataT>
669  constexpr vec(const EnableIfMultipleElems<2, Ty> Arg0,
670  const EnableIfNotUsingArrayOnDevice<Ty> Arg1)
671  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1)} {}
672  template <typename Ty = DataT>
673  constexpr vec(const EnableIfMultipleElems<3, Ty> Arg0,
674  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2)
675  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
676  vec_data<Ty>::get(Arg2)} {}
677  template <typename Ty = DataT>
678  constexpr vec(const EnableIfMultipleElems<4, Ty> Arg0,
679  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
680  const Ty Arg3)
681  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
682  vec_data<Ty>::get(Arg2), vec_data<Ty>::get(Arg3)} {}
683  template <typename Ty = DataT>
684  constexpr vec(const EnableIfMultipleElems<8, Ty> Arg0,
685  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
686  const DataT Arg3, const DataT Arg4, const DataT Arg5,
687  const DataT Arg6, const DataT Arg7)
688  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
689  vec_data<Ty>::get(Arg2), vec_data<Ty>::get(Arg3),
690  vec_data<Ty>::get(Arg4), vec_data<Ty>::get(Arg5),
691  vec_data<Ty>::get(Arg6), vec_data<Ty>::get(Arg7)} {}
692  template <typename Ty = DataT>
693  constexpr vec(const EnableIfMultipleElems<16, Ty> Arg0,
694  const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
695  const DataT Arg3, const DataT Arg4, const DataT Arg5,
696  const DataT Arg6, const DataT Arg7, const DataT Arg8,
697  const DataT Arg9, const DataT ArgA, const DataT ArgB,
698  const DataT ArgC, const DataT ArgD, const DataT ArgE,
699  const DataT ArgF)
700  : m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
701  vec_data<Ty>::get(Arg2), vec_data<Ty>::get(Arg3),
702  vec_data<Ty>::get(Arg4), vec_data<Ty>::get(Arg5),
703  vec_data<Ty>::get(Arg6), vec_data<Ty>::get(Arg7),
704  vec_data<Ty>::get(Arg8), vec_data<Ty>::get(Arg9),
705  vec_data<Ty>::get(ArgA), vec_data<Ty>::get(ArgB),
706  vec_data<Ty>::get(ArgC), vec_data<Ty>::get(ArgD),
707  vec_data<Ty>::get(ArgE), vec_data<Ty>::get(ArgF)} {}
708 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
709 
710  // Constructor from values of base type or vec of base type. Checks that
711  // base types are match and that the NumElements == sum of lengths of args.
712  template <typename... argTN, typename = EnableIfSuitableTypes<argTN...>,
713  typename = EnableIfSuitableNumElements<argTN...>>
714  constexpr vec(const argTN &...args)
715  : vec{VecArgArrayCreator<vec_data_t<DataT>, argTN...>::Create(args...),
716  std::make_index_sequence<NumElements>()} {}
717 
718 #ifdef __SYCL_DEVICE_ONLY__
719  template <typename vector_t_ = vector_t,
720  typename =
721  typename std::enable_if_t<std::is_same_v<vector_t_, vector_t> &&
722  !std::is_same_v<vector_t_, DataT>>>
723  constexpr vec(vector_t openclVector) {
724  if constexpr (!IsUsingArrayOnDevice) {
725  m_Data = openclVector;
726  } else {
727  m_Data = bit_cast<DataType>(openclVector);
728  }
729  }
730 
731  operator vector_t() const {
732  if constexpr (!IsUsingArrayOnDevice) {
733  return m_Data;
734  } else {
735  auto ptr = bit_cast<const vector_t *>((&m_Data)->data());
736  return *ptr;
737  }
738  }
739 #endif // __SYCL_DEVICE_ONLY__
740 
741  // Available only when: NumElements == 1
742  template <int N = NumElements>
743  operator typename std::enable_if_t<N == 1, DataT>() const {
744  return vec_data<DataT>::get(m_Data);
745  }
746 
747  __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead")
748  static constexpr size_t get_count() { return size(); }
749  static constexpr size_t size() noexcept { return NumElements; }
750  __SYCL2020_DEPRECATED(
751  "get_size() is deprecated, please use byte_size() instead")
752  static constexpr size_t get_size() { return byte_size(); }
753  static constexpr size_t byte_size() noexcept { return sizeof(m_Data); }
754 
755  // convertImpl can't be called with the same From and To types and therefore
756  // we need this version of convert which is mostly no-op.
757  template <typename convertT,
758  rounding_mode roundingMode = rounding_mode::automatic>
759  std::enable_if_t<
760  std::is_same_v<vec_data_t<DataT>, vec_data_t<convertT>> ||
761  std::is_same_v<detail::ConvertToOpenCLType_t<vec_data_t<DataT>>,
762  detail::ConvertToOpenCLType_t<vec_data_t<convertT>>>,
763  vec<convertT, NumElements>>
764  convert() const {
765  static_assert(std::is_integral_v<vec_data_t<convertT>> ||
766  detail::is_floating_point<convertT>::value,
767  "Unsupported convertT");
768  if constexpr (!std::is_same_v<DataT, convertT>) {
769  // Dummy conversion for cases like vec<signed char> -> vec<char>
770  vec<convertT, NumElements> Result;
771  for (size_t I = 0; I < NumElements; ++I)
772  Result.setValue(I, static_cast<convertT>(getValue(I)));
773 
774  return Result;
775  } else {
776  // No conversion necessary
777  return *this;
778  }
779  }
780 
781  template <typename convertT,
782  rounding_mode roundingMode = rounding_mode::automatic>
783  std::enable_if_t<
784  !std::is_same_v<vec_data_t<DataT>, vec_data_t<convertT>> &&
785  !std::is_same_v<detail::ConvertToOpenCLType_t<vec_data_t<DataT>>,
786  detail::ConvertToOpenCLType_t<vec_data_t<convertT>>>,
787  vec<convertT, NumElements>>
788  convert() const {
789  static_assert(std::is_integral_v<vec_data_t<convertT>> ||
790  detail::is_floating_point<convertT>::value,
791  "Unsupported convertT");
792  using T = vec_data_t<DataT>;
793  using R = vec_data_t<convertT>;
794  using OpenCLT = detail::ConvertToOpenCLType_t<T>;
795  using OpenCLR = detail::ConvertToOpenCLType_t<R>;
796  vec<convertT, NumElements> Result;
797 
798 #if defined(__SYCL_DEVICE_ONLY__)
799  using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
800  using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));
801  // Whole vector conversion can only be done, if:
802  constexpr bool canUseNativeVectorConvert =
803 #ifdef __NVPTX__
804  // - we are not on CUDA, see intel/llvm#11840
805  false &&
806 #endif
807  // - both vectors are represented using native vector types;
808  NativeVec && vec<convertT, NumElements>::NativeVec &&
809  // - vec storage has an equivalent OpenCL native vector it is implicitly
810  // convertible to. There are some corner cases where it is not the
811  // case with char, long and long long types.
812  std::is_convertible_v<decltype(m_Data), OpenCLVecT> &&
813  std::is_convertible_v<decltype(Result.m_Data), OpenCLVecR> &&
814  // - it is not a signed to unsigned (or vice versa) conversion
815  // see comments within 'convertImpl' for more details;
816  !detail::is_sint_to_from_uint<T, R>::value &&
817  // - destination type is not bool. bool is stored as integer under the
818  // hood and therefore conversion to bool looks like conversion between
819  // two integer types. Since bit pattern for true and false is not
820  // defined, there is no guarantee that integer conversion yields
821  // right results here;
822  !std::is_same_v<convertT, bool>;
823  if constexpr (canUseNativeVectorConvert) {
824  Result.m_Data = detail::convertImpl<T, R, roundingMode, NumElements,
825  OpenCLVecT, OpenCLVecR>(m_Data);
826  } else
827 #endif // defined(__SYCL_DEVICE_ONLY__)
828  {
829  // Otherwise, we fallback to per-element conversion:
830  for (size_t I = 0; I < NumElements; ++I) {
831  Result.setValue(
832  I, vec_data<convertT>::get(
833  detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
834  vec_data<DataT>::get(getValue(I)))));
835  }
836  }
837 
838  return Result;
839  }
840 
841  template <typename asT> asT as() const {
842  static_assert((sizeof(*this) == sizeof(asT)),
843  "The new SYCL vec type must have the same storage size in "
844  "bytes as this SYCL vec");
845  static_assert(
846  detail::is_contained<asT, detail::gtl::vector_basic_list>::value ||
847  detail::is_contained<asT, detail::gtl::vector_bool_list>::value,
848  "asT must be SYCL vec of a different element type and "
849  "number of elements specified by asT");
850  asT Result;
851  detail::memcpy(&Result.m_Data, &m_Data, sizeof(decltype(Result.m_Data)));
852  return Result;
853  }
854 
855  template <int... SwizzleIndexes> Swizzle<SwizzleIndexes...> swizzle() {
856  return this;
857  }
858 
859  template <int... SwizzleIndexes>
860  ConstSwizzle<SwizzleIndexes...> swizzle() const {
861  return this;
862  }
863 
864  // ext_vector_type is used as an underlying type for sycl::vec on device.
865  // The problem is that for clang vector types the return of operator[] is a
866  // temporary and not a reference to the element in the vector. In practice
867  // reinterpret_cast<DataT *>(&m_Data)[i]; is working. According to
868  // http://llvm.org/docs/GetElementPtr.html#can-gep-index-into-vector-elements
869  // this is not disallowed now. But could probably be disallowed in the future.
870  // That is why tests are added to check that behavior of the compiler has
871  // not changed.
872  //
873  // Implement operator [] in the same way for host and device.
874  // TODO: change host side implementation when underlying type for host side
875  // will be changed to std::array.
876  // NOTE: aliasing the incompatible types of bfloat16 may lead to problems if
877  // aggressively optimized. Specializing with noinline to avoid as workaround.
878 
879  template <typename T = DataT>
880  typename std::enable_if_t<!std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
881  const DataT &>
882  operator[](int i) const {
883  return reinterpret_cast<const DataT *>(&m_Data)[i];
884  }
885 
886  template <typename T = DataT>
887  typename std::enable_if_t<!std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
888  DataT &>
889  operator[](int i) {
890  return reinterpret_cast<DataT *>(&m_Data)[i];
891  }
892 
893 #ifdef _MSC_VER
894 #define __SYCL_NOINLINE_BF16 __declspec(noinline)
895 #else
896 #define __SYCL_NOINLINE_BF16 __attribute__((noinline))
897 #endif
898 
899  template <typename T = DataT>
900  __SYCL_NOINLINE_BF16
901  typename std::enable_if_t<std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
902  const DataT &>
903  operator[](int i) const {
904  return reinterpret_cast<const DataT *>(&m_Data)[i];
905  }
906 
907  template <typename T = DataT>
908  __SYCL_NOINLINE_BF16
909  typename std::enable_if_t<std::is_same_v<T, sycl::ext::oneapi::bfloat16>,
910  DataT &>
911  operator[](int i) {
912  return reinterpret_cast<DataT *>(&m_Data)[i];
913  }
914 
915 #undef __SYCL_NOINLINE_BF16
916 
917  // Begin hi/lo, even/odd, xyzw, and rgba swizzles.
918 private:
919  // Indexer used in the swizzles.def
920  // Currently it is defined as a template struct. Replacing it with a constexpr
921  // function would activate a bug in MSVC that is fixed only in v19.20.
922  // Until then MSVC does not recognize such constexpr functions as const and
923  // thus does not let using them in template parameters inside swizzle.def.
924  template <int Index> struct Indexer {
925  static constexpr int value = Index;
926  };
927 
928 public:
929 #ifdef __SYCL_ACCESS_RETURN
930 #error "Undefine __SYCL_ACCESS_RETURN macro"
931 #endif
932 #define __SYCL_ACCESS_RETURN this
933 #include "swizzles.def"
934 #undef __SYCL_ACCESS_RETURN
935  // End of hi/lo, even/odd, xyzw, and rgba swizzles.
936 
937  template <access::address_space Space, access::decorated DecorateAddress>
938  void load(size_t Offset, multi_ptr<const DataT, Space, DecorateAddress> Ptr) {
939  for (int I = 0; I < NumElements; I++) {
940  setValue(I, *multi_ptr<const DataT, Space, DecorateAddress>(
941  Ptr + Offset * NumElements + I));
942  }
943  }
944  template <access::address_space Space, access::decorated DecorateAddress>
945  void load(size_t Offset, multi_ptr<DataT, Space, DecorateAddress> Ptr) {
946  multi_ptr<const DataT, Space, DecorateAddress> ConstPtr(Ptr);
947  load(Offset, ConstPtr);
948  }
949  template <int Dimensions, access::mode Mode,
950  access::placeholder IsPlaceholder, access::target Target,
951  typename PropertyListT>
952  void
953  load(size_t Offset,
954  accessor<DataT, Dimensions, Mode, Target, IsPlaceholder, PropertyListT>
955  Acc) {
956  multi_ptr<const DataT, detail::TargetToAS<Target>::AS,
957  access::decorated::yes>
958  MultiPtr(Acc);
959  load(Offset, MultiPtr);
960  }
961  template <access::address_space Space, access::decorated DecorateAddress>
962  void store(size_t Offset,
963  multi_ptr<DataT, Space, DecorateAddress> Ptr) const {
964  for (int I = 0; I < NumElements; I++) {
965  *multi_ptr<DataT, Space, DecorateAddress>(Ptr + Offset * NumElements +
966  I) = getValue(I);
967  }
968  }
969  template <int Dimensions, access::mode Mode,
970  access::placeholder IsPlaceholder, access::target Target,
971  typename PropertyListT>
972  void
973  store(size_t Offset,
974  accessor<DataT, Dimensions, Mode, Target, IsPlaceholder, PropertyListT>
975  Acc) {
976  multi_ptr<DataT, detail::TargetToAS<Target>::AS, access::decorated::yes>
977  MultiPtr(Acc);
978  store(Offset, MultiPtr);
979  }
980 
981  void ConvertToDataT() {
982  for (size_t i = 0; i < NumElements; ++i) {
983  DataT tmp = getValue(i);
984  setValue(i, tmp);
985  }
986  }
987 
988 #ifdef __SYCL_BINOP
989 #error "Undefine __SYCL_BINOP macro"
990 #endif
991 
992 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
993 #define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \
994  friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \
995  vec Ret; \
996  if constexpr (IsUsingArrayOnDevice) { \
997  for (size_t I = 0; I < NumElements; ++I) { \
998  Ret.setValue(I, (Lhs.getValue(I) BINOP Rhs.getValue(I))); \
999  } \
1000  } else { \
1001  Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \
1002  if constexpr (std::is_same_v<Type, bool> && CONVERT) { \
1003  Ret.ConvertToDataT(); \
1004  } \
1005  } \
1006  return Ret; \
1007  } \
1008  friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \
1009  return Lhs BINOP vec(Rhs); \
1010  } \
1011  friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \
1012  return vec(Lhs) BINOP Rhs; \
1013  } \
1014  friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \
1015  Lhs = Lhs BINOP Rhs; \
1016  return Lhs; \
1017  } \
1018  template <int Num = NumElements> \
1019  friend typename std::enable_if_t<Num != 1, vec &> operator OPASSIGN( \
1020  vec & Lhs, const DataT & Rhs) { \
1021  Lhs = Lhs BINOP vec(Rhs); \
1022  return Lhs; \
1023  }
1024 
1025 #else // __SYCL_USE_EXT_VECTOR_TYPE__
1026 
1027 #define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \
1028  friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \
1029  vec Ret{}; \
1030  if constexpr (NativeVec) \
1031  Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \
1032  else \
1033  for (size_t I = 0; I < NumElements; ++I) \
1034  Ret.setValue(I, (DataT)(vec_data<DataT>::get(Lhs.getValue( \
1035  I)) BINOP vec_data<DataT>::get(Rhs.getValue(I)))); \
1036  return Ret; \
1037  } \
1038  friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \
1039  return Lhs BINOP vec(Rhs); \
1040  } \
1041  friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \
1042  return vec(Lhs) BINOP Rhs; \
1043  } \
1044  friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \
1045  Lhs = Lhs BINOP Rhs; \
1046  return Lhs; \
1047  } \
1048  template <int Num = NumElements> \
1049  friend typename std::enable_if_t<Num != 1, vec &> operator OPASSIGN( \
1050  vec & Lhs, const DataT & Rhs) { \
1051  Lhs = Lhs BINOP vec(Rhs); \
1052  return Lhs; \
1053  }
1054 
1055 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
1056 
1057  __SYCL_BINOP(+, +=, true)
1058  __SYCL_BINOP(-, -=, true)
1059  __SYCL_BINOP(*, *=, false)
1060  __SYCL_BINOP(/, /=, false)
1061 
1062  // TODO: The following OPs are available only when: DataT != cl_float &&
1063  // DataT != cl_double && DataT != cl_half
1064  __SYCL_BINOP(%, %=, false)
1065  __SYCL_BINOP(|, |=, false)
1066  __SYCL_BINOP(&, &=, false)
1067  __SYCL_BINOP(^, ^=, false)
1068  __SYCL_BINOP(>>, >>=, false)
1069  __SYCL_BINOP(<<, <<=, true)
1070 #undef __SYCL_BINOP
1071 #undef __SYCL_BINOP_HELP
1072 
1073  // Note: vec<>/SwizzleOp logical value is 0/-1 logic, as opposed to 0/1 logic.
1074  // As far as CTS validation is concerned, 0/-1 logic also applies when
1075  // NumElements is equal to one, which is somewhat inconsistent with being
1076  // transparent with scalar data.
1077  // TODO: Determine if vec<, NumElements=1> is needed at all, remove this
1078  // inconsistency if not by disallowing one-element vectors (as in OpenCL)
1079 
1080 #ifdef __SYCL_RELLOGOP
1081 #error "Undefine __SYCL_RELLOGOP macro"
1082 #endif
1083 // Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined
1084 // by SYCL device compiler only.
1085 #ifdef __SYCL_DEVICE_ONLY__
1086 #define __SYCL_RELLOGOP(RELLOGOP) \
1087  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1088  const vec & Rhs) { \
1089  vec<rel_t, NumElements> Ret{}; \
1090  /* This special case is needed since there are no standard operator|| */ \
1091  /* or operator&& functions for std::array. */ \
1092  if constexpr (IsUsingArrayOnDevice && \
1093  (std::string_view(#RELLOGOP) == "||" || \
1094  std::string_view(#RELLOGOP) == "&&")) { \
1095  for (size_t I = 0; I < NumElements; ++I) { \
1096  /* We cannot use SetValue here as the operator is not a friend of*/ \
1097  /* Ret on Windows. */ \
1098  Ret[I] = static_cast<rel_t>(-(vec_data<DataT>::get( \
1099  Lhs.getValue(I)) RELLOGOP vec_data<DataT>::get(Rhs.getValue(I)))); \
1100  } \
1101  } else { \
1102  Ret = vec<rel_t, NumElements>( \
1103  (typename vec<rel_t, NumElements>::vector_t)( \
1104  Lhs.m_Data RELLOGOP Rhs.m_Data)); \
1105  if (NumElements == 1) /*Scalar 0/1 logic was applied, invert*/ \
1106  Ret *= -1; \
1107  } \
1108  return Ret; \
1109  } \
1110  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1111  const DataT & Rhs) { \
1112  return Lhs RELLOGOP vec(Rhs); \
1113  } \
1114  friend vec<rel_t, NumElements> operator RELLOGOP(const DataT & Lhs, \
1115  const vec & Rhs) { \
1116  return vec(Lhs) RELLOGOP Rhs; \
1117  }
1118 
1119 #else
1120 #define __SYCL_RELLOGOP(RELLOGOP) \
1121  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1122  const vec & Rhs) { \
1123  vec<rel_t, NumElements> Ret{}; \
1124  for (size_t I = 0; I < NumElements; ++I) { \
1125  /* We cannot use SetValue here as the operator is not a friend of*/ \
1126  /* Ret on Windows. */ \
1127  Ret[I] = static_cast<rel_t>(-(vec_data<DataT>::get( \
1128  Lhs.getValue(I)) RELLOGOP vec_data<DataT>::get(Rhs.getValue(I)))); \
1129  } \
1130  return Ret; \
1131  } \
1132  friend vec<rel_t, NumElements> operator RELLOGOP(const vec & Lhs, \
1133  const DataT & Rhs) { \
1134  return Lhs RELLOGOP vec(Rhs); \
1135  } \
1136  friend vec<rel_t, NumElements> operator RELLOGOP(const DataT & Lhs, \
1137  const vec & Rhs) { \
1138  return vec(Lhs) RELLOGOP Rhs; \
1139  }
1140 #endif
1141 
1142  __SYCL_RELLOGOP(==)
1143  __SYCL_RELLOGOP(!=)
1144  __SYCL_RELLOGOP(>)
1145  __SYCL_RELLOGOP(<)
1146  __SYCL_RELLOGOP(>=)
1147  __SYCL_RELLOGOP(<=)
1148  // TODO: limit to integral types.
1149  __SYCL_RELLOGOP(&&)
1150  __SYCL_RELLOGOP(||)
1151 #undef __SYCL_RELLOGOP
1152 
1153 #ifdef __SYCL_UOP
1154 #error "Undefine __SYCL_UOP macro"
1155 #endif
1156 #define __SYCL_UOP(UOP, OPASSIGN) \
1157  friend vec &operator UOP(vec & Rhs) { \
1158  Rhs OPASSIGN vec_data<DataT>::get(1); \
1159  return Rhs; \
1160  } \
1161  friend vec operator UOP(vec &Lhs, int) { \
1162  vec Ret(Lhs); \
1163  Lhs OPASSIGN vec_data<DataT>::get(1); \
1164  return Ret; \
1165  }
1166 
1167  __SYCL_UOP(++, +=)
1168  __SYCL_UOP(--, -=)
1169 #undef __SYCL_UOP
1170 
1171  // operator~() available only when: dataT != float && dataT != double
1172  // && dataT != half
1173  friend vec operator~(const vec &Rhs) {
1174  if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1175  vec Ret{};
1176  for (size_t I = 0; I < NumElements; ++I) {
1177  Ret.setValue(I, ~Rhs.getValue(I));
1178  }
1179  return Ret;
1180  } else {
1181  vec Ret{(typename vec::DataType) ~Rhs.m_Data};
1182  if constexpr (std::is_same_v<Type, bool>) {
1183  Ret.ConvertToDataT();
1184  }
1185  return Ret;
1186  }
1187  }
1188 
1189  // operator!
1190  friend vec<detail::rel_t<DataT>, NumElements> operator!(const vec &Rhs) {
1191  if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1192  vec Ret{};
1193  for (size_t I = 0; I < NumElements; ++I) {
1194 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1195  // std::byte neither supports ! unary op or casting, so special handling
1196  // is needed. And, worse, Windows has a conflict with 'byte'.
1197  if constexpr (std::is_same_v<std::byte, DataT>) {
1198  Ret.setValue(I, std::byte{!vec_data<DataT>::get(Rhs.getValue(I))});
1199  } else
1200 #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1201  {
1202  Ret.setValue(I, !vec_data<DataT>::get(Rhs.getValue(I)));
1203  }
1204  }
1205  return Ret.template as<vec<detail::rel_t<DataT>, NumElements>>();
1206  } else {
1207  return vec{(typename vec<DataT, NumElements>::DataType) !Rhs.m_Data}
1208  .template as<vec<detail::rel_t<DataT>, NumElements>>();
1209  }
1210  }
1211 
1212  // operator +
1213  friend vec operator+(const vec &Lhs) {
1214  if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1215  vec Ret{};
1216  for (size_t I = 0; I < NumElements; ++I)
1217  Ret.setValue(
1218  I, vec_data<DataT>::get(+vec_data<DataT>::get(Lhs.getValue(I))));
1219  return Ret;
1220  } else {
1221  return vec{+Lhs.m_Data};
1222  }
1223  }
1224 
1225  // operator -
1226  friend vec operator-(const vec &Lhs) {
1227  namespace oneapi = sycl::ext::oneapi;
1228  vec Ret{};
1229  if constexpr (IsBfloat16 && NumElements == 1) {
1230  oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data);
1231  oneapi::bfloat16 w = -v;
1232  Ret.m_Data = oneapi::detail::bfloat16ToBits(w);
1233  } else if constexpr (IsBfloat16) {
1234  for (size_t I = 0; I < NumElements; I++) {
1235  oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data[I]);
1236  oneapi::bfloat16 w = -v;
1237  Ret.m_Data[I] = oneapi::detail::bfloat16ToBits(w);
1238  }
1239  } else if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) {
1240  for (size_t I = 0; I < NumElements; ++I)
1241  Ret.setValue(
1242  I, vec_data<DataT>::get(-vec_data<DataT>::get(Lhs.getValue(I))));
1243  return Ret;
1244  } else {
1245  Ret = vec{-Lhs.m_Data};
1246  if constexpr (std::is_same_v<Type, bool>) {
1247  Ret.ConvertToDataT();
1248  }
1249  return Ret;
1250  }
1251  }
1252 
1253  // OP is: &&, ||
1254  // vec<RET, NumElements> operatorOP(const vec<DataT, NumElements> &Rhs) const;
1255  // vec<RET, NumElements> operatorOP(const DataT &Rhs) const;
1256 
1257  // OP is: ==, !=, <, >, <=, >=
1258  // vec<RET, NumElements> operatorOP(const vec<DataT, NumElements> &Rhs) const;
1259  // vec<RET, NumElements> operatorOP(const DataT &Rhs) const;
1260 private:
1261  // Generic method that execute "Operation" on underlying values.
1262 
1263 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
1264  template <template <typename> class Operation,
1265  typename Ty = vec<DataT, NumElements>>
1266  vec<DataT, NumElements>
1267  operatorHelper(const EnableIfNotUsingArrayOnDevice<Ty> &Rhs) const {
1268  vec<DataT, NumElements> Result;
1269  Operation<DataType> Op;
1270  Result.m_Data = Op(m_Data, Rhs.m_Data);
1271  return Result;
1272  }
1273 
1274  template <template <typename> class Operation,
1275  typename Ty = vec<DataT, NumElements>>
1276  vec<DataT, NumElements>
1277  operatorHelper(const EnableIfUsingArrayOnDevice<Ty> &Rhs) const {
1278  vec<DataT, NumElements> Result;
1279  Operation<DataT> Op;
1280  for (size_t I = 0; I < NumElements; ++I) {
1281  Result.setValue(I, Op(Rhs.getValue(I), getValue(I)));
1282  }
1283  return Result;
1284  }
1285 #else // __SYCL_USE_EXT_VECTOR_TYPE__
1286  template <template <typename> class Operation>
1287  vec<DataT, NumElements>
1288  operatorHelper(const vec<DataT, NumElements> &Rhs) const {
1289  vec<DataT, NumElements> Result;
1290  Operation<DataT> Op;
1291  for (size_t I = 0; I < NumElements; ++I) {
1292  Result.setValue(I, Op(Rhs.getValue(I), getValue(I)));
1293  }
1294  return Result;
1295  }
1296 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
1297 
1298  // setValue and getValue should be able to operate on different underlying
1299  // types: enum cl_float#N , builtin vector float#N, builtin type float.
1300  // These versions are for N > 1.
1301 #ifdef __SYCL_USE_EXT_VECTOR_TYPE__
1302  template <int Num = NumElements, typename Ty = int,
1303  typename = typename std::enable_if_t<1 != Num>>
1304  constexpr void setValue(EnableIfNotHostHalf<Ty> Index, const DataT &Value,
1305  int) {
1306  m_Data[Index] = vec_data<DataT>::set(Value);
1307  }
1308 
1309  template <int Num = NumElements, typename Ty = int,
1310  typename = typename std::enable_if_t<1 != Num>>
1311  constexpr DataT getValue(EnableIfNotHostHalf<Ty> Index, int) const {
1312  return vec_data<DataT>::get(m_Data[Index]);
1313  }
1314 
1315  template <int Num = NumElements, typename Ty = int,
1316  typename = typename std::enable_if_t<1 != Num>>
1317  constexpr void setValue(EnableIfHostHalf<Ty> Index, const DataT &Value, int) {
1318  m_Data.s[Index] = vec_data<DataT>::set(Value);
1319  }
1320 
1321  template <int Num = NumElements, typename Ty = int,
1322  typename = typename std::enable_if_t<1 != Num>>
1323  constexpr DataT getValue(EnableIfHostHalf<Ty> Index, int) const {
1324  return vec_data<DataT>::get(m_Data.s[Index]);
1325  }
1326 #else // __SYCL_USE_EXT_VECTOR_TYPE__
1327  template <int Num = NumElements,
1328  typename = typename std::enable_if_t<1 != Num>>
1329  constexpr void setValue(int Index, const DataT &Value, int) {
1330  m_Data[Index] = vec_data<DataT>::set(Value);
1331  }
1332 
1333  template <int Num = NumElements,
1334  typename = typename std::enable_if_t<1 != Num>>
1335  constexpr DataT getValue(int Index, int) const {
1336  return vec_data<DataT>::get(m_Data[Index]);
1337  }
1338 #endif // __SYCL_USE_EXT_VECTOR_TYPE__
1339 
1340  // N==1 versions, used by host and device. Shouldn't trailing type be int?
1341  template <int Num = NumElements,
1342  typename = typename std::enable_if_t<1 == Num>>
1343  constexpr void setValue(int, const DataT &Value, float) {
1344  m_Data = vec_data<DataT>::set(Value);
1345  }
1346 
1347  template <int Num = NumElements,
1348  typename = typename std::enable_if_t<1 == Num>>
1349  DataT getValue(int, float) const {
1350  return vec_data<DataT>::get(m_Data);
1351  }
1352 
1353  // setValue and getValue.
1354  // The "api" functions used by BINOP etc. These versions just dispatch
1355  // using additional int or float arg to disambiguate vec<1> vs. vec<N>
1356  // Special proxies as specialization is not allowed in class scope.
1357  constexpr void setValue(int Index, const DataT &Value) {
1358  if (NumElements == 1)
1359  setValue(Index, Value, 0);
1360  else
1361  setValue(Index, Value, 0.f);
1362  }
1363 
1364  DataT getValue(int Index) const {
1365  return (NumElements == 1) ? getValue(Index, 0) : getValue(Index, 0.f);
1366  }
1367 
1368  // fields
1369 
1370  // Alignment is the same as size, to a maximum size of 64.
1371  // detail::vector_alignment will return that value.
1372  alignas(detail::vector_alignment<DataT, NumElements>::value) DataType m_Data;
1373 
1374  // friends
1375  template <typename T1, typename T2, typename T3, template <typename> class T4,
1376  int... T5>
1377  friend class detail::SwizzleOp;
1378  template <typename T1, int T2> friend class vec;
1379 };
1381 
1382 #ifdef __cpp_deduction_guides
1383 // all compilers supporting deduction guides also support fold expressions
1384 template <class T, class... U,
1385  class = std::enable_if_t<(std::is_same_v<T, U> && ...)>>
1386 vec(T, U...) -> vec<T, sizeof...(U) + 1>;
1387 #endif
1388 
1389 namespace detail {
1390 
1391 // Special type for working SwizzleOp with scalars, stores a scalar and gives
1392 // the scalar at any index. Provides interface is compatible with SwizzleOp
1393 // operations
1394 template <typename T> class GetScalarOp {
1395 public:
1396  using DataT = T;
1397  GetScalarOp(DataT Data) : m_Data(Data) {}
1398  DataT getValue(size_t) const { return m_Data; }
1399 
1400 private:
1401  DataT m_Data;
1402 };
1403 
1404 template <typename T> struct EqualTo {
1405  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1406  return (Lhs == Rhs) ? -1 : 0;
1407  }
1408 };
1409 
1410 template <typename T> struct NotEqualTo {
1411  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1412  return (Lhs != Rhs) ? -1 : 0;
1413  }
1414 };
1415 
1416 template <typename T> struct GreaterEqualTo {
1417  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1418  return (Lhs >= Rhs) ? -1 : 0;
1419  }
1420 };
1421 
1422 template <typename T> struct LessEqualTo {
1423  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1424  return (Lhs <= Rhs) ? -1 : 0;
1425  }
1426 };
1427 
1428 template <typename T> struct GreaterThan {
1429  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1430  return (Lhs > Rhs) ? -1 : 0;
1431  }
1432 };
1433 
1434 template <typename T> struct LessThan {
1435  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1436  return (Lhs < Rhs) ? -1 : 0;
1437  }
1438 };
1439 
1440 template <typename T> struct LogicalAnd {
1441  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1442  return (Lhs && Rhs) ? -1 : 0;
1443  }
1444 };
1445 
1446 template <typename T> struct LogicalOr {
1447  constexpr rel_t<T> operator()(const T &Lhs, const T &Rhs) const {
1448  return (Lhs || Rhs) ? -1 : 0;
1449  }
1450 };
1451 
1452 template <typename T> struct RShift {
1453  constexpr T operator()(const T &Lhs, const T &Rhs) const {
1454  return Lhs >> Rhs;
1455  }
1456 };
1457 
1458 template <typename T> struct LShift {
1459  constexpr T operator()(const T &Lhs, const T &Rhs) const {
1460  return Lhs << Rhs;
1461  }
1462 };
1463 
1465 // SwizzleOP represents expression templates that operate on vec.
1466 // Actual computation performed on conversion or assignment operators.
1467 template <typename VecT, typename OperationLeftT, typename OperationRightT,
1468  template <typename> class OperationCurrentT, int... Indexes>
1469 class SwizzleOp {
1470  using DataT = typename VecT::element_type;
1471  // Certain operators return a vector with a different element type. Also, the
1472  // left and right operand types may differ. CommonDataT selects a result type
1473  // based on these types to ensure that the result value can be represented.
1474  //
1475  // Example 1:
1476  // sycl::vec<unsigned char, 4> vec{...};
1477  // auto result = 300u + vec.x();
1478  //
1479  // CommonDataT is std::common_type_t<OperationLeftT, OperationRightT> since
1480  // it's larger than unsigned char.
1481  //
1482  // Example 2:
1483  // sycl::vec<bool, 1> vec{...};
1484  // auto result = vec.template swizzle<sycl::elem::s0>() && vec;
1485  //
1486  // CommonDataT is DataT since operator&& returns a vector with element type
1487  // int8_t, which is larger than bool.
1488  //
1489  // Example 3:
1490  // sycl::vec<std::byte, 4> vec{...}; auto swlo = vec.lo();
1491  // auto result = swlo == swlo;
1492  //
1493  // CommonDataT is DataT since operator== returns a vector with element type
1494  // int8_t, which is the same size as std::byte. std::common_type_t<DataT, ...>
1495  // can't be used here since there's no type that int8_t and std::byte can both
1496  // be implicitly converted to.
1497  using OpLeftDataT = typename OperationLeftT::DataT;
1498  using OpRightDataT = typename OperationRightT::DataT;
1499  using CommonDataT = std::conditional_t<
1500  sizeof(DataT) >= sizeof(std::common_type_t<OpLeftDataT, OpRightDataT>),
1501  DataT, std::common_type_t<OpLeftDataT, OpRightDataT>>;
1502  static constexpr int getNumElements() { return sizeof...(Indexes); }
1503 
1504  using rel_t = detail::rel_t<DataT>;
1505  using vec_t = vec<DataT, sizeof...(Indexes)>;
1506  using vec_rel_t = vec<rel_t, sizeof...(Indexes)>;
1507 
1508  template <typename OperationRightT_,
1509  template <typename> class OperationCurrentT_, int... Idx_>
1510  using NewLHOp = SwizzleOp<VecT,
1511  SwizzleOp<VecT, OperationLeftT, OperationRightT,
1512  OperationCurrentT, Indexes...>,
1513  OperationRightT_, OperationCurrentT_, Idx_...>;
1514 
1515  template <typename OperationRightT_,
1516  template <typename> class OperationCurrentT_, int... Idx_>
1517  using NewRelOp = SwizzleOp<vec<rel_t, VecT::getNumElements()>,
1518  SwizzleOp<VecT, OperationLeftT, OperationRightT,
1519  OperationCurrentT, Indexes...>,
1520  OperationRightT_, OperationCurrentT_, Idx_...>;
1521 
1522  template <typename OperationLeftT_,
1523  template <typename> class OperationCurrentT_, int... Idx_>
1524  using NewRHOp = SwizzleOp<VecT, OperationLeftT_,
1525  SwizzleOp<VecT, OperationLeftT, OperationRightT,
1526  OperationCurrentT, Indexes...>,
1527  OperationCurrentT_, Idx_...>;
1528 
1529  template <int IdxNum, typename T = void>
1530  using EnableIfOneIndex = typename std::enable_if_t<
1531  1 == IdxNum && SwizzleOp::getNumElements() == IdxNum, T>;
1532 
1533  template <int IdxNum, typename T = void>
1534  using EnableIfMultipleIndexes = typename std::enable_if_t<
1535  1 != IdxNum && SwizzleOp::getNumElements() == IdxNum, T>;
1536 
1537  template <typename T>
1538  using EnableIfScalarType = typename std::enable_if_t<
1539  std::is_convertible_v<DataT, T> &&
1540  (std::is_fundamental_v<vec_data_t<T>> ||
1541  detail::is_half_or_bf16_v<typename std::remove_const_t<T>>)>;
1542 
1543  template <typename T>
1544  using EnableIfNoScalarType = typename std::enable_if_t<
1545  !std::is_convertible_v<DataT, T> ||
1546  !(std::is_fundamental_v<vec_data_t<T>> ||
1547  detail::is_half_or_bf16_v<typename std::remove_const_t<T>>)>;
1548 
1549  template <int... Indices>
1550  using Swizzle =
1551  SwizzleOp<VecT, GetOp<DataT>, GetOp<DataT>, GetOp, Indices...>;
1552 
1553  template <int... Indices>
1554  using ConstSwizzle =
1555  SwizzleOp<const VecT, GetOp<DataT>, GetOp<DataT>, GetOp, Indices...>;
1556 
1557 public:
1558  using element_type = DataT;
1559  using value_type = DataT;
1560 
1561 #ifdef __SYCL_DEVICE_ONLY__
1562  using vector_t = typename vec_t::vector_t;
1563 #endif // __SYCL_DEVICE_ONLY__
1564 
1565  const DataT &operator[](int i) const {
1566  std::array<int, getNumElements()> Idxs{Indexes...};
1567  return (*m_Vector)[Idxs[i]];
1568  }
1569 
1570  template <typename _T = VecT>
1571  std::enable_if_t<!std::is_const_v<_T>, DataT> &operator[](int i) {
1572  std::array<int, getNumElements()> Idxs{Indexes...};
1573  return (*m_Vector)[Idxs[i]];
1574  }
1575 
1576  __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead")
1577  size_t get_count() const { return size(); }
1578  static constexpr size_t size() noexcept { return getNumElements(); }
1579 
1580  template <int Num = getNumElements()>
1581  __SYCL2020_DEPRECATED(
1582  "get_size() is deprecated, please use byte_size() instead")
1583  size_t get_size() const {
1584  return byte_size<Num>();
1585  }
1586 
1587  template <int Num = getNumElements()> size_t byte_size() const noexcept {
1588  return sizeof(DataT) * (Num == 3 ? 4 : Num);
1589  }
1590 
1591  template <typename T, int IdxNum = getNumElements(),
1592  typename = EnableIfOneIndex<IdxNum>,
1593  typename = EnableIfScalarType<T>>
1594  operator T() const {
1595  return getValue(0);
1596  }
1597 
1598  template <typename T, typename = EnableIfScalarType<T>>
1599  friend NewRHOp<GetScalarOp<T>, std::multiplies, Indexes...>
1600  operator*(const T &Lhs, const SwizzleOp &Rhs) {
1601  return NewRHOp<GetScalarOp<T>, std::multiplies, Indexes...>(
1602  Rhs.m_Vector, GetScalarOp<T>(Lhs), Rhs);
1603  }
1604 
1605  template <typename T, typename = EnableIfScalarType<T>>
1606  friend NewRHOp<GetScalarOp<T>, std::plus, Indexes...>
1607  operator+(const T &Lhs, const SwizzleOp &Rhs) {
1608  return NewRHOp<GetScalarOp<T>, std::plus, Indexes...>(
1609  Rhs.m_Vector, GetScalarOp<T>(Lhs), Rhs);
1610  }
1611 
1612  template <typename T, typename = EnableIfScalarType<T>>
1613  friend NewRHOp<GetScalarOp<T>, std::divides, Indexes...>
1614  operator/(const T &Lhs, const SwizzleOp &Rhs) {
1615  return NewRHOp<GetScalarOp<T>, std::divides, Indexes...>(
1616  Rhs.m_Vector, GetScalarOp<T>(Lhs), Rhs);
1617  }
1618 
1619  // TODO: Check that Rhs arg is suitable.
1620 #ifdef __SYCL_OPASSIGN
1621 #error "Undefine __SYCL_OPASSIGN macro."
1622 #endif
1623 #define __SYCL_OPASSIGN(OPASSIGN, OP) \
1624  SwizzleOp &operator OPASSIGN(const DataT & Rhs) { \
1625  operatorHelper<OP>(vec_t(Rhs)); \
1626  return *this; \
1627  } \
1628  template <typename RhsOperation> \
1629  SwizzleOp &operator OPASSIGN(const RhsOperation & Rhs) { \
1630  operatorHelper<OP>(Rhs); \
1631  return *this; \
1632  }
1633 
1634  __SYCL_OPASSIGN(+=, std::plus)
1635  __SYCL_OPASSIGN(-=, std::minus)
1636  __SYCL_OPASSIGN(*=, std::multiplies)
1637  __SYCL_OPASSIGN(/=, std::divides)
1638  __SYCL_OPASSIGN(%=, std::modulus)
1639  __SYCL_OPASSIGN(&=, std::bit_and)
1640  __SYCL_OPASSIGN(|=, std::bit_or)
1641  __SYCL_OPASSIGN(^=, std::bit_xor)
1642  __SYCL_OPASSIGN(>>=, RShift)
1643  __SYCL_OPASSIGN(<<=, LShift)
1644 #undef __SYCL_OPASSIGN
1645 
1646 #ifdef __SYCL_UOP
1647 #error "Undefine __SYCL_UOP macro"
1648 #endif
1649 #define __SYCL_UOP(UOP, OPASSIGN) \
1650  SwizzleOp &operator UOP() { \
1651  *this OPASSIGN static_cast<DataT>(1); \
1652  return *this; \
1653  } \
1654  vec_t operator UOP(int) { \
1655  vec_t Ret = *this; \
1656  *this OPASSIGN static_cast<DataT>(1); \
1657  return Ret; \
1658  }
1659 
1660  __SYCL_UOP(++, +=)
1661  __SYCL_UOP(--, -=)
1662 #undef __SYCL_UOP
1663 
1664  template <typename T = DataT>
1665  friend typename std::enable_if_t<
1666  std::is_same_v<T, DataT> && std::is_integral_v<vec_data_t<T>>, vec_t>
1667  operator~(const SwizzleOp &Rhs) {
1668  vec_t Tmp = Rhs;
1669  return ~Tmp;
1670  }
1671 
1672  friend vec_rel_t operator!(const SwizzleOp &Rhs) {
1673  vec_t Tmp = Rhs;
1674  return !Tmp;
1675  }
1676 
1677  friend vec_t operator+(const SwizzleOp &Rhs) {
1678  vec_t Tmp = Rhs;
1679  return +Tmp;
1680  }
1681 
1682  friend vec_t operator-(const SwizzleOp &Rhs) {
1683  vec_t Tmp = Rhs;
1684  return -Tmp;
1685  }
1686 
1687 // scalar BINOP vec<>
1688 // scalar BINOP SwizzleOp
1689 // vec<> BINOP SwizzleOp
1690 #ifdef __SYCL_BINOP
1691 #error "Undefine __SYCL_BINOP macro"
1692 #endif
1693 #define __SYCL_BINOP(BINOP) \
1694  friend vec_t operator BINOP(const DataT &Lhs, const SwizzleOp &Rhs) { \
1695  vec_t Tmp = Rhs; \
1696  return Lhs BINOP Tmp; \
1697  } \
1698  friend vec_t operator BINOP(const SwizzleOp &Lhs, const DataT &Rhs) { \
1699  vec_t Tmp = Lhs; \
1700  return Tmp BINOP Rhs; \
1701  } \
1702  friend vec_t operator BINOP(const vec_t &Lhs, const SwizzleOp &Rhs) { \
1703  vec_t Tmp = Rhs; \
1704  return Lhs BINOP Tmp; \
1705  } \
1706  friend vec_t operator BINOP(const SwizzleOp &Lhs, const vec_t &Rhs) { \
1707  vec_t Tmp = Lhs; \
1708  return Tmp BINOP Rhs; \
1709  }
1710 
1711  __SYCL_BINOP(+)
1712  __SYCL_BINOP(-)
1713  __SYCL_BINOP(*)
1714  __SYCL_BINOP(/)
1715  __SYCL_BINOP(%)
1716  __SYCL_BINOP(&)
1717  __SYCL_BINOP(|)
1718  __SYCL_BINOP(^)
1719  __SYCL_BINOP(>>)
1720  __SYCL_BINOP(<<)
1721 #undef __SYCL_BINOP
1722 
1723 // scalar RELLOGOP vec<>
1724 // scalar RELLOGOP SwizzleOp
1725 // vec<> RELLOGOP SwizzleOp
1726 #ifdef __SYCL_RELLOGOP
1727 #error "Undefine __SYCL_RELLOGOP macro"
1728 #endif
1729 #define __SYCL_RELLOGOP(RELLOGOP) \
1730  friend vec_rel_t operator RELLOGOP(const DataT &Lhs, const SwizzleOp &Rhs) { \
1731  vec_t Tmp = Rhs; \
1732  return Lhs RELLOGOP Tmp; \
1733  } \
1734  friend vec_rel_t operator RELLOGOP(const SwizzleOp &Lhs, const DataT &Rhs) { \
1735  vec_t Tmp = Lhs; \
1736  return Tmp RELLOGOP Rhs; \
1737  } \
1738  friend vec_rel_t operator RELLOGOP(const vec_t &Lhs, const SwizzleOp &Rhs) { \
1739  vec_t Tmp = Rhs; \
1740  return Lhs RELLOGOP Tmp; \
1741  } \
1742  friend vec_rel_t operator RELLOGOP(const SwizzleOp &Lhs, const vec_t &Rhs) { \
1743  vec_t Tmp = Lhs; \
1744  return Tmp RELLOGOP Rhs; \
1745  }
1746 
1747  __SYCL_RELLOGOP(==)
1748  __SYCL_RELLOGOP(!=)
1749  __SYCL_RELLOGOP(>)
1750  __SYCL_RELLOGOP(<)
1751  __SYCL_RELLOGOP(>=)
1752  __SYCL_RELLOGOP(<=)
1753  // TODO: limit to integral types.
1754  __SYCL_RELLOGOP(&&)
1755  __SYCL_RELLOGOP(||)
1756 #undef __SYCL_RELLOGOP
1757 
1758  template <int IdxNum = getNumElements(),
1759  typename = EnableIfMultipleIndexes<IdxNum>>
1760  SwizzleOp &operator=(const vec<DataT, IdxNum> &Rhs) {
1761  std::array<int, IdxNum> Idxs{Indexes...};
1762  for (size_t I = 0; I < Idxs.size(); ++I) {
1763  m_Vector->setValue(Idxs[I], Rhs.getValue(I));
1764  }
1765  return *this;
1766  }
1767 
1768  template <int IdxNum = getNumElements(), typename = EnableIfOneIndex<IdxNum>>
1769  SwizzleOp &operator=(const DataT &Rhs) {
1770  std::array<int, IdxNum> Idxs{Indexes...};
1771  m_Vector->setValue(Idxs[0], Rhs);
1772  return *this;
1773  }
1774 
1775  template <int IdxNum = getNumElements(),
1776  EnableIfMultipleIndexes<IdxNum, bool> = true>
1777  SwizzleOp &operator=(const DataT &Rhs) {
1778  std::array<int, IdxNum> Idxs{Indexes...};
1779  for (auto Idx : Idxs) {
1780  m_Vector->setValue(Idx, Rhs);
1781  }
1782  return *this;
1783  }
1784 
1785  template <int IdxNum = getNumElements(), typename = EnableIfOneIndex<IdxNum>>
1786  SwizzleOp &operator=(DataT &&Rhs) {
1787  std::array<int, IdxNum> Idxs{Indexes...};
1788  m_Vector->setValue(Idxs[0], Rhs);
1789  return *this;
1790  }
1791 
1792  template <typename T, typename = EnableIfScalarType<T>>
1793  NewLHOp<GetScalarOp<T>, std::multiplies, Indexes...>
1794  operator*(const T &Rhs) const {
1795  return NewLHOp<GetScalarOp<T>, std::multiplies, Indexes...>(
1796  m_Vector, *this, GetScalarOp<T>(Rhs));
1797  }
1798 
1799  template <typename RhsOperation,
1800  typename = EnableIfNoScalarType<RhsOperation>>
1801  NewLHOp<RhsOperation, std::multiplies, Indexes...>
1802  operator*(const RhsOperation &Rhs) const {
1803  return NewLHOp<RhsOperation, std::multiplies, Indexes...>(m_Vector, *this,
1804  Rhs);
1805  }
1806 
1807  template <typename T, typename = EnableIfScalarType<T>>
1808  NewLHOp<GetScalarOp<T>, std::plus, Indexes...> operator+(const T &Rhs) const {
1809  return NewLHOp<GetScalarOp<T>, std::plus, Indexes...>(m_Vector, *this,
1810  GetScalarOp<T>(Rhs));
1811  }
1812 
1813  template <typename RhsOperation,
1814  typename = EnableIfNoScalarType<RhsOperation>>
1815  NewLHOp<RhsOperation, std::plus, Indexes...>
1816  operator+(const RhsOperation &Rhs) const {
1817  return NewLHOp<RhsOperation, std::plus, Indexes...>(m_Vector, *this, Rhs);
1818  }
1819 
1820  template <typename T, typename = EnableIfScalarType<T>>
1821  NewLHOp<GetScalarOp<T>, std::minus, Indexes...>
1822  operator-(const T &Rhs) const {
1823  return NewLHOp<GetScalarOp<T>, std::minus, Indexes...>(m_Vector, *this,
1824  GetScalarOp<T>(Rhs));
1825  }
1826 
1827  template <typename RhsOperation,
1828  typename = EnableIfNoScalarType<RhsOperation>>
1829  NewLHOp<RhsOperation, std::minus, Indexes...>
1830  operator-(const RhsOperation &Rhs) const {
1831  return NewLHOp<RhsOperation, std::minus, Indexes...>(m_Vector, *this, Rhs);
1832  }
1833 
1834  template <typename T, typename = EnableIfScalarType<T>>
1835  NewLHOp<GetScalarOp<T>, std::divides, Indexes...>
1836  operator/(const T &Rhs) const {
1837  return NewLHOp<GetScalarOp<T>, std::divides, Indexes...>(
1838  m_Vector, *this, GetScalarOp<T>(Rhs));
1839  }
1840 
1841  template <typename RhsOperation,
1842  typename = EnableIfNoScalarType<RhsOperation>>
1843  NewLHOp<RhsOperation, std::divides, Indexes...>
1844  operator/(const RhsOperation &Rhs) const {
1845  return NewLHOp<RhsOperation, std::divides, Indexes...>(m_Vector, *this,
1846  Rhs);
1847  }
1848 
1849  template <typename T, typename = EnableIfScalarType<T>>
1850  NewLHOp<GetScalarOp<T>, std::modulus, Indexes...>
1851  operator%(const T &Rhs) const {
1852  return NewLHOp<GetScalarOp<T>, std::modulus, Indexes...>(
1853  m_Vector, *this, GetScalarOp<T>(Rhs));
1854  }
1855 
1856  template <typename RhsOperation,
1857  typename = EnableIfNoScalarType<RhsOperation>>
1858  NewLHOp<RhsOperation, std::modulus, Indexes...>
1859  operator%(const RhsOperation &Rhs) const {
1860  return NewLHOp<RhsOperation, std::modulus, Indexes...>(m_Vector, *this,
1861  Rhs);
1862  }
1863 
1864  template <typename T, typename = EnableIfScalarType<T>>
1865  NewLHOp<GetScalarOp<T>, std::bit_and, Indexes...>
1866  operator&(const T &Rhs) const {
1867  return NewLHOp<GetScalarOp<T>, std::bit_and, Indexes...>(
1868  m_Vector, *this, GetScalarOp<T>(Rhs));
1869  }
1870 
1871  template <typename RhsOperation,
1872  typename = EnableIfNoScalarType<RhsOperation>>
1873  NewLHOp<RhsOperation, std::bit_and, Indexes...>
1874  operator&(const RhsOperation &Rhs) const {
1875  return NewLHOp<RhsOperation, std::bit_and, Indexes...>(m_Vector, *this,
1876  Rhs);
1877  }
1878 
1879  template <typename T, typename = EnableIfScalarType<T>>
1880  NewLHOp<GetScalarOp<T>, std::bit_or, Indexes...>
1881  operator|(const T &Rhs) const {
1882  return NewLHOp<GetScalarOp<T>, std::bit_or, Indexes...>(
1883  m_Vector, *this, GetScalarOp<T>(Rhs));
1884  }
1885 
1886  template <typename RhsOperation,
1887  typename = EnableIfNoScalarType<RhsOperation>>
1888  NewLHOp<RhsOperation, std::bit_or, Indexes...>
1889  operator|(const RhsOperation &Rhs) const {
1890  return NewLHOp<RhsOperation, std::bit_or, Indexes...>(m_Vector, *this, Rhs);
1891  }
1892 
1893  template <typename T, typename = EnableIfScalarType<T>>
1894  NewLHOp<GetScalarOp<T>, std::bit_xor, Indexes...>
1895  operator^(const T &Rhs) const {
1896  return NewLHOp<GetScalarOp<T>, std::bit_xor, Indexes...>(
1897  m_Vector, *this, GetScalarOp<T>(Rhs));
1898  }
1899 
1900  template <typename RhsOperation,
1901  typename = EnableIfNoScalarType<RhsOperation>>
1902  NewLHOp<RhsOperation, std::bit_xor, Indexes...>
1903  operator^(const RhsOperation &Rhs) const {
1904  return NewLHOp<RhsOperation, std::bit_xor, Indexes...>(m_Vector, *this,
1905  Rhs);
1906  }
1907 
1908  template <typename T, typename = EnableIfScalarType<T>>
1909  NewLHOp<GetScalarOp<T>, RShift, Indexes...> operator>>(const T &Rhs) const {
1910  return NewLHOp<GetScalarOp<T>, RShift, Indexes...>(m_Vector, *this,
1911  GetScalarOp<T>(Rhs));
1912  }
1913 
1914  template <typename RhsOperation,
1915  typename = EnableIfNoScalarType<RhsOperation>>
1916  NewLHOp<RhsOperation, RShift, Indexes...>
1917  operator>>(const RhsOperation &Rhs) const {
1918  return NewLHOp<RhsOperation, RShift, Indexes...>(m_Vector, *this, Rhs);
1919  }
1920 
1921  template <typename T, typename = EnableIfScalarType<T>>
1922  NewLHOp<GetScalarOp<T>, LShift, Indexes...> operator<<(const T &Rhs) const {
1923  return NewLHOp<GetScalarOp<T>, LShift, Indexes...>(m_Vector, *this,
1924  GetScalarOp<T>(Rhs));
1925  }
1926 
1927  template <typename RhsOperation,
1928  typename = EnableIfNoScalarType<RhsOperation>>
1929  NewLHOp<RhsOperation, LShift, Indexes...>
1930  operator<<(const RhsOperation &Rhs) const {
1931  return NewLHOp<RhsOperation, LShift, Indexes...>(m_Vector, *this, Rhs);
1932  }
1933 
1934  template <
1935  typename T1, typename T2, typename T3, template <typename> class T4,
1936  int... T5,
1937  typename = typename std::enable_if_t<sizeof...(T5) == getNumElements()>>
1938  SwizzleOp &operator=(const SwizzleOp<T1, T2, T3, T4, T5...> &Rhs) {
1939  std::array<int, getNumElements()> Idxs{Indexes...};
1940  for (size_t I = 0; I < Idxs.size(); ++I) {
1941  m_Vector->setValue(Idxs[I], Rhs.getValue(I));
1942  }
1943  return *this;
1944  }
1945 
1946  template <
1947  typename T1, typename T2, typename T3, template <typename> class T4,
1948  int... T5,
1949  typename = typename std::enable_if_t<sizeof...(T5) == getNumElements()>>
1950  SwizzleOp &operator=(SwizzleOp<T1, T2, T3, T4, T5...> &&Rhs) {
1951  std::array<int, getNumElements()> Idxs{Indexes...};
1952  for (size_t I = 0; I < Idxs.size(); ++I) {
1953  m_Vector->setValue(Idxs[I], Rhs.getValue(I));
1954  }
1955  return *this;
1956  }
1957 
1958  template <typename T, typename = EnableIfScalarType<T>>
1959  NewRelOp<GetScalarOp<T>, EqualTo, Indexes...> operator==(const T &Rhs) const {
1960  return NewRelOp<GetScalarOp<T>, EqualTo, Indexes...>(NULL, *this,
1961  GetScalarOp<T>(Rhs));
1962  }
1963 
1964  template <typename RhsOperation,
1965  typename = EnableIfNoScalarType<RhsOperation>>
1966  NewRelOp<RhsOperation, EqualTo, Indexes...>
1967  operator==(const RhsOperation &Rhs) const {
1968  return NewRelOp<RhsOperation, EqualTo, Indexes...>(NULL, *this, Rhs);
1969  }
1970 
1971  template <typename T, typename = EnableIfScalarType<T>>
1972  NewRelOp<GetScalarOp<T>, NotEqualTo, Indexes...>
1973  operator!=(const T &Rhs) const {
1974  return NewRelOp<GetScalarOp<T>, NotEqualTo, Indexes...>(
1975  NULL, *this, GetScalarOp<T>(Rhs));
1976  }
1977 
1978  template <typename RhsOperation,
1979  typename = EnableIfNoScalarType<RhsOperation>>
1980  NewRelOp<RhsOperation, NotEqualTo, Indexes...>
1981  operator!=(const RhsOperation &Rhs) const {
1982  return NewRelOp<RhsOperation, NotEqualTo, Indexes...>(NULL, *this, Rhs);
1983  }
1984 
1985  template <typename T, typename = EnableIfScalarType<T>>
1986  NewRelOp<GetScalarOp<T>, GreaterEqualTo, Indexes...>
1987  operator>=(const T &Rhs) const {
1988  return NewRelOp<GetScalarOp<T>, GreaterEqualTo, Indexes...>(
1989  NULL, *this, GetScalarOp<T>(Rhs));
1990  }
1991 
1992  template <typename RhsOperation,
1993  typename = EnableIfNoScalarType<RhsOperation>>
1994  NewRelOp<RhsOperation, GreaterEqualTo, Indexes...>
1995  operator>=(const RhsOperation &Rhs) const {
1996  return NewRelOp<RhsOperation, GreaterEqualTo, Indexes...>(NULL, *this, Rhs);
1997  }
1998 
1999  template <typename T, typename = EnableIfScalarType<T>>
2000  NewRelOp<GetScalarOp<T>, LessEqualTo, Indexes...>
2001  operator<=(const T &Rhs) const {
2002  return NewRelOp<GetScalarOp<T>, LessEqualTo, Indexes...>(
2003  NULL, *this, GetScalarOp<T>(Rhs));
2004  }
2005 
2006  template <typename RhsOperation,
2007  typename = EnableIfNoScalarType<RhsOperation>>
2008  NewRelOp<RhsOperation, LessEqualTo, Indexes...>
2009  operator<=(const RhsOperation &Rhs) const {
2010  return NewRelOp<RhsOperation, LessEqualTo, Indexes...>(NULL, *this, Rhs);
2011  }
2012 
2013  template <typename T, typename = EnableIfScalarType<T>>
2014  NewRelOp<GetScalarOp<T>, GreaterThan, Indexes...>
2015  operator>(const T &Rhs) const {
2016  return NewRelOp<GetScalarOp<T>, GreaterThan, Indexes...>(
2017  NULL, *this, GetScalarOp<T>(Rhs));
2018  }
2019 
2020  template <typename RhsOperation,
2021  typename = EnableIfNoScalarType<RhsOperation>>
2022  NewRelOp<RhsOperation, GreaterThan, Indexes...>
2023  operator>(const RhsOperation &Rhs) const {
2024  return NewRelOp<RhsOperation, GreaterThan, Indexes...>(NULL, *this, Rhs);
2025  }
2026 
2027  template <typename T, typename = EnableIfScalarType<T>>
2028  NewRelOp<GetScalarOp<T>, LessThan, Indexes...> operator<(const T &Rhs) const {
2029  return NewRelOp<GetScalarOp<T>, LessThan, Indexes...>(NULL, *this,
2030  GetScalarOp<T>(Rhs));
2031  }
2032 
2033  template <typename RhsOperation,
2034  typename = EnableIfNoScalarType<RhsOperation>>
2035  NewRelOp<RhsOperation, LessThan, Indexes...>
2036  operator<(const RhsOperation &Rhs) const {
2037  return NewRelOp<RhsOperation, LessThan, Indexes...>(NULL, *this, Rhs);
2038  }
2039 
2040  template <typename T, typename = EnableIfScalarType<T>>
2041  NewRelOp<GetScalarOp<T>, LogicalAnd, Indexes...>
2042  operator&&(const T &Rhs) const {
2043  return NewRelOp<GetScalarOp<T>, LogicalAnd, Indexes...>(
2044  NULL, *this, GetScalarOp<T>(Rhs));
2045  }
2046 
2047  template <typename RhsOperation,
2048  typename = EnableIfNoScalarType<RhsOperation>>
2049  NewRelOp<RhsOperation, LogicalAnd, Indexes...>
2050  operator&&(const RhsOperation &Rhs) const {
2051  return NewRelOp<RhsOperation, LogicalAnd, Indexes...>(NULL, *this, Rhs);
2052  }
2053 
2054  template <typename T, typename = EnableIfScalarType<T>>
2055  NewRelOp<GetScalarOp<T>, LogicalOr, Indexes...>
2056  operator||(const T &Rhs) const {
2057  return NewRelOp<GetScalarOp<T>, LogicalOr, Indexes...>(NULL, *this,
2058  GetScalarOp<T>(Rhs));
2059  }
2060 
2061  template <typename RhsOperation,
2062  typename = EnableIfNoScalarType<RhsOperation>>
2063  NewRelOp<RhsOperation, LogicalOr, Indexes...>
2064  operator||(const RhsOperation &Rhs) const {
2065  return NewRelOp<RhsOperation, LogicalOr, Indexes...>(NULL, *this, Rhs);
2066  }
2067 
2068  // Begin hi/lo, even/odd, xyzw, and rgba swizzles.
2069 private:
2070  // Indexer used in the swizzles.def.
2071  // Currently it is defined as a template struct. Replacing it with a constexpr
2072  // function would activate a bug in MSVC that is fixed only in v19.20.
2073  // Until then MSVC does not recognize such constexpr functions as const and
2074  // thus does not let using them in template parameters inside swizzle.def.
2075  template <int Index> struct Indexer {
2076  static constexpr int IDXs[sizeof...(Indexes)] = {Indexes...};
2077  static constexpr int value = IDXs[Index >= getNumElements() ? 0 : Index];
2078  };
2079 
2080 public:
2081 #ifdef __SYCL_ACCESS_RETURN
2082 #error "Undefine __SYCL_ACCESS_RETURN macro"
2083 #endif
2084 #define __SYCL_ACCESS_RETURN m_Vector
2085 #include "swizzles.def"
2086 #undef __SYCL_ACCESS_RETURN
2087  // End of hi/lo, even/odd, xyzw, and rgba swizzles.
2088 
2089  // Leave store() interface to automatic conversion to vec<>.
2090  // Load to vec_t and then assign to swizzle.
2091  template <access::address_space Space, access::decorated DecorateAddress>
2092  void load(size_t offset, multi_ptr<DataT, Space, DecorateAddress> ptr) {
2093  vec_t Tmp;
2094  Tmp.template load(offset, ptr);
2095  *this = Tmp;
2096  }
2097 
2098  template <typename convertT, rounding_mode roundingMode>
2099  vec<convertT, sizeof...(Indexes)> convert() const {
2100  // First materialize the swizzle to vec_t and then apply convert() to it.
2101  vec_t Tmp = *this;
2102  return Tmp.template convert<convertT, roundingMode>();
2103  }
2104 
2105  template <typename asT> asT as() const {
2106  // First materialize the swizzle to vec_t and then apply as() to it.
2107  vec_t Tmp = *this;
2108  static_assert((sizeof(Tmp) == sizeof(asT)),
2109  "The new SYCL vec type must have the same storage size in "
2110  "bytes as this SYCL swizzled vec");
2111  static_assert(
2112  detail::is_contained<asT, detail::gtl::vector_basic_list>::value ||
2113  detail::is_contained<asT, detail::gtl::vector_bool_list>::value,
2114  "asT must be SYCL vec of a different element type and "
2115  "number of elements specified by asT");
2116  return Tmp.template as<asT>();
2117  }
2118 
2119 private:
2120  SwizzleOp(const SwizzleOp &Rhs)
2121  : m_Vector(Rhs.m_Vector), m_LeftOperation(Rhs.m_LeftOperation),
2122  m_RightOperation(Rhs.m_RightOperation) {}
2123 
2124  SwizzleOp(VecT *Vector, OperationLeftT LeftOperation,
2125  OperationRightT RightOperation)
2126  : m_Vector(Vector), m_LeftOperation(LeftOperation),
2127  m_RightOperation(RightOperation) {}
2128 
2129  SwizzleOp(VecT *Vector) : m_Vector(Vector) {}
2130 
2131  SwizzleOp(SwizzleOp &&Rhs)
2132  : m_Vector(Rhs.m_Vector), m_LeftOperation(std::move(Rhs.m_LeftOperation)),
2133  m_RightOperation(std::move(Rhs.m_RightOperation)) {}
2134 
2135  // Either performing CurrentOperation on results of left and right operands
2136  // or reading values from actual vector. Perform implicit type conversion when
2137  // the number of elements == 1
2138 
2139  template <int IdxNum = getNumElements()>
2140  CommonDataT getValue(EnableIfOneIndex<IdxNum, size_t> Index) const {
2141  if (std::is_same_v<OperationCurrentT<DataT>, GetOp<DataT>>) {
2142  std::array<int, getNumElements()> Idxs{Indexes...};
2143  return m_Vector->getValue(Idxs[Index]);
2144  }
2145  auto Op = OperationCurrentT<vec_data_t<CommonDataT>>();
2146  return vec_data<CommonDataT>::get(
2147  Op(vec_data<CommonDataT>::get(m_LeftOperation.getValue(Index)),
2148  vec_data<CommonDataT>::get(m_RightOperation.getValue(Index))));
2149  }
2150 
2151  template <int IdxNum = getNumElements()>
2152  DataT getValue(EnableIfMultipleIndexes<IdxNum, size_t> Index) const {
2153  if (std::is_same_v<OperationCurrentT<DataT>, GetOp<DataT>>) {
2154  std::array<int, getNumElements()> Idxs{Indexes...};
2155  return m_Vector->getValue(Idxs[Index]);
2156  }
2157  auto Op = OperationCurrentT<vec_data_t<DataT>>();
2158  return vec_data<DataT>::get(
2159  Op(vec_data<DataT>::get(m_LeftOperation.getValue(Index)),
2160  vec_data<DataT>::get(m_RightOperation.getValue(Index))));
2161  }
2162 
2163  template <template <typename> class Operation, typename RhsOperation>
2164  void operatorHelper(const RhsOperation &Rhs) {
2165  Operation<vec_data_t<DataT>> Op;
2166  std::array<int, getNumElements()> Idxs{Indexes...};
2167  for (size_t I = 0; I < Idxs.size(); ++I) {
2168  DataT Res = vec_data<DataT>::get(
2169  Op(vec_data<DataT>::get(m_Vector->getValue(Idxs[I])),
2170  vec_data<DataT>::get(Rhs.getValue(I))));
2171  m_Vector->setValue(Idxs[I], Res);
2172  }
2173  }
2174 
2175  // fields
2176  VecT *m_Vector;
2177 
2178  OperationLeftT m_LeftOperation;
2179  OperationRightT m_RightOperation;
2180 
2181  // friends
2182  template <typename T1, int T2> friend class sycl::vec;
2183 
2184  template <typename T1, typename T2, typename T3, template <typename> class T4,
2185  int... T5>
2186  friend class SwizzleOp;
2187 };
2189 } // namespace detail
2190 } // namespace _V1
2191 } // namespace sycl
DataT operator()(DataT, DataT)
Definition: vector.hpp:182
DataT getValue(size_t) const
Definition: vector.hpp:181
class sycl::vec ///////////////////////// Provides a cross-patform vector class template that works e...
std::conditional_t< sizeof(_IN)==1, T8, std::conditional_t< sizeof(_IN)==2, T16, std::conditional_t< sizeof(_IN)==4, T32, T64 > >> select_apply_cl_t
Definition: vector.hpp:95
typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_char), opencl::cl_char, typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_short), opencl::cl_short, typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_int), opencl::cl_int, typename std::conditional_t< sizeof(DataT)==sizeof(opencl::cl_long), opencl::cl_long, bool > >> > rel_t
Definition: vector.hpp:174
constexpr bool isValidVectorSize(int N)
Definition: vector.hpp:193
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
__attribute__((destructor(110))) static void syclUnload()
constexpr size_t MaxVecAlignment
std::int32_t cl_int
Definition: aliases.hpp:134
std::int8_t cl_char
Definition: aliases.hpp:130
std::int16_t cl_short
Definition: aliases.hpp:132
std::int64_t cl_long
Definition: aliases.hpp:136
unsigned char byte
Definition: image.hpp:107
typename detail::vec_helper< T >::RetType vec_data_t
Definition: vector.hpp:354
detail::vec_helper< T > vec_data
Definition: vector.hpp:351
Definition: access.hpp:18
std::array< T,(N==3) ? 4 :N > DataType
Definition: vector.hpp:218
typename VecStorageImpl< select_apply_cl_t< bool, std::int8_t, std::int16_t, std::int32_t, std::int64_t >, N >::DataType DataType
Definition: vector.hpp:236
sycl::ext::oneapi::detail::Bfloat16StorageT DataType
Definition: vector.hpp:332
sycl::ext::oneapi::detail::Bfloat16StorageT VectorDataType
Definition: vector.hpp:334
static constexpr RetType set(bool value)
Definition: vector.hpp:106
select_apply_cl_t< bool, std::int8_t, std::int16_t, std::int32_t, std::int64_t > RetType
Definition: vector.hpp:104
static constexpr RetType get(bool value)
Definition: vector.hpp:105
static constexpr RetType get(std::byte value)
Definition: vector.hpp:147
static constexpr RetType set(std::byte value)
Definition: vector.hpp:148
static constexpr std::byte get(std::uint8_t value)
Definition: vector.hpp:149
static constexpr std::byte set(std::uint8_t value)
Definition: vector.hpp:152
static constexpr RetType get(BFloat16StorageT value)
Definition: vector.hpp:112
static constexpr BFloat16StorageT set(RetType value)
Definition: vector.hpp:130
static constexpr RetType get(RetType value)
Definition: vector.hpp:128
sycl::ext::oneapi::detail::Bfloat16StorageT BFloat16StorageT
Definition: vector.hpp:111
static constexpr RetType set(T value)
Definition: vector.hpp:100
static constexpr RetType get(T value)
Definition: vector.hpp:99
static constexpr int s4
Definition: vector.hpp:74
static constexpr int s5
Definition: vector.hpp:75
static constexpr int s6
Definition: vector.hpp:76
static constexpr int s1
Definition: vector.hpp:71
static constexpr int x
Definition: vector.hpp:62
static constexpr int s2
Definition: vector.hpp:72
static constexpr int sB
Definition: vector.hpp:81
static constexpr int sC
Definition: vector.hpp:82
static constexpr int sE
Definition: vector.hpp:84
static constexpr int a
Definition: vector.hpp:69
static constexpr int w
Definition: vector.hpp:65
static constexpr int z
Definition: vector.hpp:64
static constexpr int s9
Definition: vector.hpp:79
static constexpr int sA
Definition: vector.hpp:80
static constexpr int sF
Definition: vector.hpp:85
static constexpr int s7
Definition: vector.hpp:77
static constexpr int s3
Definition: vector.hpp:73
static constexpr int r
Definition: vector.hpp:66
static constexpr int g
Definition: vector.hpp:67
static constexpr int sD
Definition: vector.hpp:83
static constexpr int y
Definition: vector.hpp:63
static constexpr int s0
Definition: vector.hpp:70
static constexpr int s8
Definition: vector.hpp:78
static constexpr int b
Definition: vector.hpp:68
#define __SYCL_DEFINE_HALF_VECSTORAGE(Num)
Definition: vector.hpp:317
#define __SYCL_DEFINE_BF16_VECSTORAGE(Num)
Definition: vector.hpp:337
Implementation of vec::convert.