DPC++ Runtime
Runtime libraries for oneAPI DPC++
simd.hpp
Go to the documentation of this file.
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 #define ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
10 // clang-format off
11 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
12 // Stub implemenation the "Data-Parallel Types" section of the
13 // " C++ Extensions for Parallelism Version 2":
14 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/n4808.pdf.
15 //
16 // This is mostly a copy-paste from
17 // https://github.com/intel/llvm/blob/sycl/libcxx/include/experimental/simd
18 // Changes are marked with "SYCL/invoke_simd".
19 // Primary usage for now is to implement the invoke_simd spec.
20 //===----------------------------------------------------------------------===//
21 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
22 
23 #pragma once // Added for ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
24 // Removed for ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD {
25 //#ifndef _LIBCPP_EXPERIMENTAL_SIMD
26 //#define _LIBCPP_EXPERIMENTAL_SIMD
27 // } Removed for ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
28 
29 /*
30  experimental/simd synopsis
31 
32 namespace std::experimental {
33 
34 inline namespace parallelism_v2 {
35 
36 namespace simd_abi {
37 
38 struct scalar {};
39 template <int N> struct fixed_size {};
40 template <typename T> inline constexpr int max_fixed_size = implementation-defined;
41 template <typename T> using compatible = implementation-defined;
42 template <typename T> using native = implementation-defined;
43 
44 } // simd_abi
45 
46 struct element_aligned_tag {};
47 struct vector_aligned_tag {};
48 template <size_t> struct overaligned_tag {};
49 inline constexpr element_aligned_tag element_aligned{};
50 inline constexpr vector_aligned_tag vector_aligned{};
51 template <size_t N> inline constexpr overaligned_tag<N> overaligned{};
52 
53 // traits [simd.traits]
54 template <class T> struct is_abi_tag;
55 template <class T> inline constexpr bool is_abi_tag_v = is_abi_tag<T>::value;
56 
57 template <class T> struct is_simd;
58 template <class T> inline constexpr bool is_simd_v = is_simd<T>::value;
59 
60 template <class T> struct is_simd_mask;
61 template <class T> inline constexpr bool is_simd_mask_v = is_simd_mask<T>::value;
62 
63 template <class T> struct is_simd_flag_type;
64 template <class T> inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<T>::value;
65 
66 template <class T, size_t N> struct abi_for_size { using type = see below; };
67 template <class T, size_t N> using abi_for_size_t = typename abi_for_size<T, N>::type;
68 
69 template <class T, class Abi = simd_abi::compatible<T>> struct simd_size;
70 template <class T, class Abi = simd_abi::compatible<T>>
71 inline constexpr size_t simd_size_v = simd_size<T, Abi>::value;
72 
73 template <class T, class U = typename T::value_type> struct memory_alignment;
74 template <class T, class U = typename T::value_type>
75 inline constexpr size_t memory_alignment_v = memory_alignment<T, U>::value;
76 
77 // class template simd [simd.class]
78 template <class T, class Abi = simd_abi::compatible<T>> class simd;
79 template <class T> using native_simd = simd<T, simd_abi::native<T>>;
80 template <class T, int N> using fixed_size_simd = simd<T, simd_abi::fixed_size<N>>;
81 
82 // class template simd_mask [simd.mask.class]
83 template <class T, class Abi = simd_abi::compatible<T>> class simd_mask;
84 template <class T> using native_simd_mask = simd_mask<T, simd_abi::native<T>>;
85 template <class T, int N> using fixed_size_simd_mask = simd_mask<T, simd_abi::fixed_size<N>>;
86 
87 // casts [simd.casts]
88 template <class T, class U, class Abi> see below simd_cast(const simd<U, Abi>&);
89 template <class T, class U, class Abi> see below static_simd_cast(const simd<U, Abi>&);
90 
91 template <class T, class Abi>
92 fixed_size_simd<T, simd_size_v<T, Abi>> to_fixed_size(const simd<T, Abi>&) noexcept;
93 template <class T, class Abi>
94 fixed_size_simd_mask<T, simd_size_v<T, Abi>> to_fixed_size(const simd_mask<T, Abi>&) noexcept;
95 template <class T, size_t N> native_simd<T> to_native(const fixed_size_simd<T, N>&) noexcept;
96 template <class T, size_t N>
97 native_simd_mask<T> to_native(const fixed_size_simd_mask<T, N>> &) noexcept;
98 template <class T, size_t N> simd<T> to_compatible(const fixed_size_simd<T, N>&) noexcept;
99 template <class T, size_t N> simd_mask<T> to_compatible(const fixed_size_simd_mask<T, N>&) noexcept;
100 
101 template <size_t... Sizes, class T, class Abi>
102 tuple<simd<T, abi_for_size_t<Sizes>>...> split(const simd<T, Abi>&);
103 template <size_t... Sizes, class T, class Abi>
104 tuple<simd_mask<T, abi_for_size_t<Sizes>>...> split(const simd_mask<T, Abi>&);
105 template <class V, class Abi>
106 array<V, simd_size_v<typename V::value_type, Abi> / V::size()> split(
107 const simd<typename V::value_type, Abi>&);
108 template <class V, class Abi>
109 array<V, simd_size_v<typename V::value_type, Abi> / V::size()> split(
110 const simd_mask<typename V::value_type, Abi>&);
111 
112 template <class T, class... Abis>
113 simd<T, abi_for_size_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd<T, Abis>&...);
114 template <class T, class... Abis>
115 simd_mask<T, abi_for_size_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd_mask<T, Abis>&...);
116 
117 // reductions [simd.mask.reductions]
118 template <class T, class Abi> bool all_of(const simd_mask<T, Abi>&) noexcept;
119 template <class T, class Abi> bool any_of(const simd_mask<T, Abi>&) noexcept;
120 template <class T, class Abi> bool none_of(const simd_mask<T, Abi>&) noexcept;
121 template <class T, class Abi> bool some_of(const simd_mask<T, Abi>&) noexcept;
122 template <class T, class Abi> int popcount(const simd_mask<T, Abi>&) noexcept;
123 template <class T, class Abi> int find_first_set(const simd_mask<T, Abi>&);
124 template <class T, class Abi> int find_last_set(const simd_mask<T, Abi>&);
125 
126 bool all_of(see below) noexcept;
127 bool any_of(see below) noexcept;
128 bool none_of(see below) noexcept;
129 bool some_of(see below) noexcept;
130 int popcount(see below) noexcept;
131 int find_first_set(see below) noexcept;
132 int find_last_set(see below) noexcept;
133 
134 // masked assignment [simd.whereexpr]
135 template <class M, class T> class const_where_expression;
136 template <class M, class T> class where_expression;
137 
138 // masked assignment [simd.mask.where]
139 template <class T> struct nodeduce { using type = T; }; // exposition only
140 
141 template <class T> using nodeduce_t = typename nodeduce<T>::type; // exposition only
142 
143 template <class T, class Abi>
144 where_expression<simd_mask<T, Abi>, simd<T, Abi>>
145 where(const typename simd<T, Abi>::mask_type&, simd<T, Abi>&) noexcept;
146 
147 template <class T, class Abi>
148 const_where_expression<simd_mask<T, Abi>, const simd<T, Abi>>
149 where(const typename simd<T, Abi>::mask_type&, const simd<T, Abi>&) noexcept;
150 
151 template <class T, class Abi>
152 where_expression<simd_mask<T, Abi>, simd_mask<T, Abi>>
153 where(const nodeduce_t<simd_mask<T, Abi>>&, simd_mask<T, Abi>&) noexcept;
154 
155 template <class T, class Abi>
156 const_where_expression<simd_mask<T, Abi>, const simd_mask<T, Abi>>
157 where(const nodeduce_t<simd_mask<T, Abi>>&, const simd_mask<T, Abi>&) noexcept;
158 
159 template <class T> where_expression<bool, T> where(see below k, T& d) noexcept;
160 
161 template <class T>
162 const_where_expression<bool, const T> where(see below k, const T& d) noexcept;
163 
164 // reductions [simd.reductions]
165 template <class T, class Abi, class BinaryOperation = std::plus<>>
166 T reduce(const simd<T, Abi>&, BinaryOperation = BinaryOperation());
167 
168 template <class M, class V, class BinaryOperation>
169 typename V::value_type reduce(const const_where_expression<M, V>& x,
170 typename V::value_type neutral_element, BinaryOperation binary_op);
171 
172 template <class M, class V>
173 typename V::value_type reduce(const const_where_expression<M, V>& x, plus<> binary_op = plus<>());
174 
175 template <class M, class V>
176 typename V::value_type reduce(const const_where_expression<M, V>& x, multiplies<> binary_op);
177 
178 template <class M, class V>
179 typename V::value_type reduce(const const_where_expression<M, V>& x, bit_and<> binary_op);
180 
181 template <class M, class V>
182 typename V::value_type reduce(const const_where_expression<M, V>& x, bit_or<> binary_op);
183 
184 template <class M, class V>
185 typename V::value_type reduce(const const_where_expression<M, V>& x, bit_xor<> binary_op);
186 
187 template <class T, class Abi> T hmin(const simd<T, Abi>&);
188 template <class M, class V> T hmin(const const_where_expression<M, V>&);
189 template <class T, class Abi> T hmax(const simd<T, Abi>&);
190 template <class M, class V> T hmax(const const_where_expression<M, V>&);
191 
192 // algorithms [simd.alg]
193 template <class T, class Abi> simd<T, Abi> min(const simd<T, Abi>&, const simd<T, Abi>&) noexcept;
194 
195 template <class T, class Abi> simd<T, Abi> max(const simd<T, Abi>&, const simd<T, Abi>&) noexcept;
196 
197 template <class T, class Abi>
198 std::pair<simd<T, Abi>, simd<T, Abi>> minmax(const simd<T, Abi>&, const simd<T, Abi>&) noexcept;
199 
200 template <class T, class Abi>
201 simd<T, Abi> clamp(const simd<T, Abi>& v, const simd<T, Abi>& lo, const simd<T, Abi>& hi);
202 
203 // [simd.whereexpr]
204 template <class M, class T>
205 class const_where_expression {
206  const M& mask; // exposition only
207  T& data; // exposition only
208 public:
209  const_where_expression(const const_where_expression&) = delete;
210  const_where_expression& operator=(const const_where_expression&) = delete;
211  remove_const_t<T> operator-() const &&;
212  template <class U, class Flags> void copy_to(U* mem, Flags f) const &&;
213 };
214 
215 template <class M, class T>
216 class where_expression : public const_where_expression<M, T> {
217 public:
218  where_expression(const where_expression&) = delete;
219  where_expression& operator=(const where_expression&) = delete;
220  template <class U> void operator=(U&& x);
221  template <class U> void operator+=(U&& x);
222  template <class U> void operator-=(U&& x);
223  template <class U> void operator*=(U&& x);
224  template <class U> void operator/=(U&& x);
225  template <class U> void operator%=(U&& x);
226  template <class U> void operator&=(U&& x);
227  template <class U> void operator|=(U&& x);
228  template <class U> void operator^=(U&& x);
229  template <class U> void operator<<=(U&& x);
230  template <class U> void operator>>=(U&& x);
231  void operator++();
232  void operator++(int);
233  void operator--();
234  void operator--(int);
235  template <class U, class Flags> void copy_from(const U* mem, Flags);
236 };
237 
238 // [simd.class]
239 template <class T, class Abi> class simd {
240 public:
241  using value_type = T;
242  using reference = see below;
243  using mask_type = simd_mask<T, Abi>;
244 
245  using abi_type = Abi;
246  static constexpr size_t size() noexcept;
247  simd() = default;
248 
249  // implicit type conversion constructor
250  template <class U> simd(const simd<U, simd_abi::fixed_size<size()>>&);
251 
252  // implicit broadcast constructor (see below for constraints)
253  template <class U> simd(U&& value);
254 
255  // generator constructor (see below for constraints)
256  template <class G> explicit simd(G&& gen);
257 
258  // load constructor
259  template <class U, class Flags> simd(const U* mem, Flags f);
260 
261  // loads [simd.load]
262  template <class U, class Flags> void copy_from(const U* mem, Flags f);
263 
264  // stores [simd.store]
265  template <class U, class Flags> void copy_to(U* mem, Flags f) const;
266 
267  // scalar access [simd.subscr]
268  reference operator[](size_t);
269  value_type operator[](size_t) const;
270 
271  // unary operators [simd.unary]
272  simd& operator++();
273  simd operator++(int);
274  simd& operator--();
275  simd operator--(int);
276  mask_type operator!() const;
277  simd operator~() const; // see below
278  simd operator+() const;
279  simd operator-() const;
280 
281  // binary operators [simd.binary]
282  friend simd operator+ (const simd&, const simd&);
283  friend simd operator- (const simd&, const simd&);
284  friend simd operator* (const simd&, const simd&);
285  friend simd operator/ (const simd&, const simd&);
286  friend simd operator% (const simd&, const simd&);
287  friend simd operator& (const simd&, const simd&);
288  friend simd operator| (const simd&, const simd&);
289  friend simd operator^ (const simd&, const simd&);
290  friend simd operator<<(const simd&, const simd&);
291  friend simd operator>>(const simd&, const simd&);
292  friend simd operator<<(const simd&, int);
293  friend simd operator>>(const simd&, int);
294 
295  // compound assignment [simd.cassign]
296  friend simd& operator+= (simd&, const simd&);
297  friend simd& operator-= (simd&, const simd&);
298  friend simd& operator*= (simd&, const simd&);
299  friend simd& operator/= (simd&, const simd&);
300  friend simd& operator%= (simd&, const simd&);
301 
302  friend simd& operator&= (simd&, const simd&);
303  friend simd& operator|= (simd&, const simd&);
304  friend simd& operator^= (simd&, const simd&);
305  friend simd& operator<<=(simd&, const simd&);
306  friend simd& operator>>=(simd&, const simd&);
307  friend simd& operator<<=(simd&, int);
308  friend simd& operator>>=(simd&, int);
309 
310  // compares [simd.comparison]
311  friend mask_type operator==(const simd&, const simd&);
312  friend mask_type operator!=(const simd&, const simd&);
313  friend mask_type operator>=(const simd&, const simd&);
314  friend mask_type operator<=(const simd&, const simd&);
315  friend mask_type operator> (const simd&, const simd&);
316  friend mask_type operator< (const simd&, const simd&);
317 };
318 
319 // [simd.math]
320 template <class Abi> using scharv = simd<signed char, Abi>; // exposition only
321 template <class Abi> using shortv = simd<short, Abi>; // exposition only
322 template <class Abi> using intv = simd<int, Abi>; // exposition only
323 template <class Abi> using longv = simd<long int, Abi>; // exposition only
324 template <class Abi> using llongv = simd<long long int, Abi>; // exposition only
325 template <class Abi> using floatv = simd<float, Abi>; // exposition only
326 template <class Abi> using doublev = simd<double, Abi>; // exposition only
327 template <class Abi> using ldoublev = simd<long double, Abi>; // exposition only
328 template <class T, class V> using samesize = fixed_size_simd<T, V::size()>; // exposition only
329 
330 template <class Abi> floatv<Abi> acos(floatv<Abi> x);
331 template <class Abi> doublev<Abi> acos(doublev<Abi> x);
332 template <class Abi> ldoublev<Abi> acos(ldoublev<Abi> x);
333 
334 template <class Abi> floatv<Abi> asin(floatv<Abi> x);
335 template <class Abi> doublev<Abi> asin(doublev<Abi> x);
336 template <class Abi> ldoublev<Abi> asin(ldoublev<Abi> x);
337 
338 template <class Abi> floatv<Abi> atan(floatv<Abi> x);
339 template <class Abi> doublev<Abi> atan(doublev<Abi> x);
340 template <class Abi> ldoublev<Abi> atan(ldoublev<Abi> x);
341 
342 template <class Abi> floatv<Abi> atan2(floatv<Abi> y, floatv<Abi> x);
343 template <class Abi> doublev<Abi> atan2(doublev<Abi> y, doublev<Abi> x);
344 template <class Abi> ldoublev<Abi> atan2(ldoublev<Abi> y, ldoublev<Abi> x);
345 
346 template <class Abi> floatv<Abi> cos(floatv<Abi> x);
347 template <class Abi> doublev<Abi> cos(doublev<Abi> x);
348 template <class Abi> ldoublev<Abi> cos(ldoublev<Abi> x);
349 
350 template <class Abi> floatv<Abi> sin(floatv<Abi> x);
351 template <class Abi> doublev<Abi> sin(doublev<Abi> x);
352 template <class Abi> ldoublev<Abi> sin(ldoublev<Abi> x);
353 
354 template <class Abi> floatv<Abi> tan(floatv<Abi> x);
355 template <class Abi> doublev<Abi> tan(doublev<Abi> x);
356 template <class Abi> ldoublev<Abi> tan(ldoublev<Abi> x);
357 
358 template <class Abi> floatv<Abi> acosh(floatv<Abi> x);
359 template <class Abi> doublev<Abi> acosh(doublev<Abi> x);
360 template <class Abi> ldoublev<Abi> acosh(ldoublev<Abi> x);
361 
362 template <class Abi> floatv<Abi> asinh(floatv<Abi> x);
363 template <class Abi> doublev<Abi> asinh(doublev<Abi> x);
364 template <class Abi> ldoublev<Abi> asinh(ldoublev<Abi> x);
365 
366 template <class Abi> floatv<Abi> atanh(floatv<Abi> x);
367 template <class Abi> doublev<Abi> atanh(doublev<Abi> x);
368 template <class Abi> ldoublev<Abi> atanh(ldoublev<Abi> x);
369 
370 template <class Abi> floatv<Abi> cosh(floatv<Abi> x);
371 template <class Abi> doublev<Abi> cosh(doublev<Abi> x);
372 template <class Abi> ldoublev<Abi> cosh(ldoublev<Abi> x);
373 
374 template <class Abi> floatv<Abi> sinh(floatv<Abi> x);
375 template <class Abi> doublev<Abi> sinh(doublev<Abi> x);
376 template <class Abi> ldoublev<Abi> sinh(ldoublev<Abi> x);
377 
378 template <class Abi> floatv<Abi> tanh(floatv<Abi> x);
379 template <class Abi> doublev<Abi> tanh(doublev<Abi> x);
380 template <class Abi> ldoublev<Abi> tanh(ldoublev<Abi> x);
381 
382 template <class Abi> floatv<Abi> exp(floatv<Abi> x);
383 template <class Abi> doublev<Abi> exp(doublev<Abi> x);
384 template <class Abi> ldoublev<Abi> exp(ldoublev<Abi> x);
385 
386 template <class Abi> floatv<Abi> exp2(floatv<Abi> x);
387 template <class Abi> doublev<Abi> exp2(doublev<Abi> x);
388 template <class Abi> ldoublev<Abi> exp2(ldoublev<Abi> x);
389 
390 template <class Abi> floatv<Abi> expm1(floatv<Abi> x);
391 template <class Abi> doublev<Abi> expm1(doublev<Abi> x);
392 template <class Abi> ldoublev<Abi> expm1(ldoublev<Abi> x);
393 
394 template <class Abi> floatv<Abi> frexp(floatv<Abi> value, samesize<int, floatv<Abi>>* exp);
395 template <class Abi> doublev<Abi> frexp(doublev<Abi> value, samesize<int, doublev<Abi>>* exp);
396 template <class Abi> ldoublev<Abi> frexp(ldoublev<Abi> value, samesize<int, ldoublev<Abi>>* exp);
397 
398 template <class Abi> samesize<int, floatv<Abi>> ilogb(floatv<Abi> x);
399 template <class Abi> samesize<int, doublev<Abi>> ilogb(doublev<Abi> x);
400 template <class Abi> samesize<int, ldoublev<Abi>> ilogb(ldoublev<Abi> x);
401 
402 template <class Abi> floatv<Abi> ldexp(floatv<Abi> x, samesize<int, floatv<Abi>> exp);
403 template <class Abi> doublev<Abi> ldexp(doublev<Abi> x, samesize<int, doublev<Abi>> exp);
404 template <class Abi> ldoublev<Abi> ldexp(ldoublev<Abi> x, samesize<int, ldoublev<Abi>> exp);
405 
406 template <class Abi> floatv<Abi> log(floatv<Abi> x);
407 template <class Abi> doublev<Abi> log(doublev<Abi> x);
408 template <class Abi> ldoublev<Abi> log(ldoublev<Abi> x);
409 
410 template <class Abi> floatv<Abi> log10(floatv<Abi> x);
411 template <class Abi> doublev<Abi> log10(doublev<Abi> x);
412 template <class Abi> ldoublev<Abi> log10(ldoublev<Abi> x);
413 
414 template <class Abi> floatv<Abi> log1p(floatv<Abi> x);
415 template <class Abi> doublev<Abi> log1p(doublev<Abi> x);
416 template <class Abi> ldoublev<Abi> log1p(ldoublev<Abi> x);
417 
418 template <class Abi> floatv<Abi> log2(floatv<Abi> x);
419 template <class Abi> doublev<Abi> log2(doublev<Abi> x);
420 template <class Abi> ldoublev<Abi> log2(ldoublev<Abi> x);
421 
422 template <class Abi> floatv<Abi> logb(floatv<Abi> x);
423 template <class Abi> doublev<Abi> logb(doublev<Abi> x);
424 template <class Abi> ldoublev<Abi> logb(ldoublev<Abi> x);
425 
426 template <class Abi> floatv<Abi> modf(floatv<Abi> value, floatv<Abi>* iptr);
427 template <class Abi> doublev<Abi> modf(doublev<Abi> value, doublev<Abi>* iptr);
428 template <class Abi> ldoublev<Abi> modf(ldoublev<Abi> value, ldoublev<Abi>* iptr);
429 
430 template <class Abi> floatv<Abi> scalbn(floatv<Abi> x, samesize<int, floatv<Abi>> n);
431 template <class Abi> doublev<Abi> scalbn(doublev<Abi> x, samesize<int, doublev<Abi>> n);
432 template <class Abi> ldoublev<Abi> scalbn(ldoublev<Abi> x, samesize<int, ldoublev<Abi>> n);
433 template <class Abi> floatv<Abi> scalbln(floatv<Abi> x, samesize<long int, floatv<Abi>> n);
434 template <class Abi> doublev<Abi> scalbln(doublev<Abi> x, samesize<long int, doublev<Abi>> n);
435 template <class Abi> ldoublev<Abi> scalbln(ldoublev<Abi> x, samesize<long int, ldoublev<Abi>> n);
436 
437 template <class Abi> floatv<Abi> cbrt(floatv<Abi> x);
438 template <class Abi> doublev<Abi> cbrt(doublev<Abi> x);
439 template <class Abi> ldoublev<Abi> cbrt(ldoublev<Abi> x);
440 
441 template <class Abi> scharv<Abi> abs(scharv<Abi> j);
442 template <class Abi> shortv<Abi> abs(shortv<Abi> j);
443 template <class Abi> intv<Abi> abs(intv<Abi> j);
444 template <class Abi> longv<Abi> abs(longv<Abi> j);
445 template <class Abi> llongv<Abi> abs(llongv<Abi> j);
446 template <class Abi> floatv<Abi> abs(floatv<Abi> j);
447 template <class Abi> doublev<Abi> abs(doublev<Abi> j);
448 template <class Abi> ldoublev<Abi> abs(ldoublev<Abi> j);
449 
450 template <class Abi> floatv<Abi> hypot(floatv<Abi> x, floatv<Abi> y);
451 template <class Abi> doublev<Abi> hypot(doublev<Abi> x, doublev<Abi> y);
452 template <class Abi> ldoublev<Abi> hypot(doublev<Abi> x, doublev<Abi> y);
453 template <class Abi> floatv<Abi> hypot(floatv<Abi> x, floatv<Abi> y, floatv<Abi> z);
454 template <class Abi> doublev<Abi> hypot(doublev<Abi> x, doublev<Abi> y, doublev<Abi> z);
455 template <class Abi> ldoublev<Abi> hypot(ldoublev<Abi> x, ldoublev<Abi> y, ldoublev<Abi> z);
456 
457 template <class Abi> floatv<Abi> pow(floatv<Abi> x, floatv<Abi> y);
458 template <class Abi> doublev<Abi> pow(doublev<Abi> x, doublev<Abi> y);
459 template <class Abi> ldoublev<Abi> pow(ldoublev<Abi> x, ldoublev<Abi> y);
460 
461 template <class Abi> floatv<Abi> sqrt(floatv<Abi> x);
462 template <class Abi> doublev<Abi> sqrt(doublev<Abi> x);
463 template <class Abi> ldoublev<Abi> sqrt(ldoublev<Abi> x);
464 
465 template <class Abi> floatv<Abi> erf(floatv<Abi> x);
466 template <class Abi> doublev<Abi> erf(doublev<Abi> x);
467 template <class Abi> ldoublev<Abi> erf(ldoublev<Abi> x);
468 template <class Abi> floatv<Abi> erfc(floatv<Abi> x);
469 template <class Abi> doublev<Abi> erfc(doublev<Abi> x);
470 template <class Abi> ldoublev<Abi> erfc(ldoublev<Abi> x);
471 
472 template <class Abi> floatv<Abi> lgamma(floatv<Abi> x);
473 template <class Abi> doublev<Abi> lgamma(doublev<Abi> x);
474 template <class Abi> ldoublev<Abi> lgamma(ldoublev<Abi> x);
475 
476 template <class Abi> floatv<Abi> tgamma(floatv<Abi> x);
477 template <class Abi> doublev<Abi> tgamma(doublev<Abi> x);
478 template <class Abi> ldoublev<Abi> tgamma(ldoublev<Abi> x);
479 
480 template <class Abi> floatv<Abi> ceil(floatv<Abi> x);
481 template <class Abi> doublev<Abi> ceil(doublev<Abi> x);
482 template <class Abi> ldoublev<Abi> ceil(ldoublev<Abi> x);
483 
484 template <class Abi> floatv<Abi> floor(floatv<Abi> x);
485 template <class Abi> doublev<Abi> floor(doublev<Abi> x);
486 template <class Abi> ldoublev<Abi> floor(ldoublev<Abi> x);
487 
488 template <class Abi> floatv<Abi> nearbyint(floatv<Abi> x);
489 template <class Abi> doublev<Abi> nearbyint(doublev<Abi> x);
490 template <class Abi> ldoublev<Abi> nearbyint(ldoublev<Abi> x);
491 
492 template <class Abi> floatv<Abi> rint(floatv<Abi> x);
493 template <class Abi> doublev<Abi> rint(doublev<Abi> x);
494 template <class Abi> ldoublev<Abi> rint(ldoublev<Abi> x);
495 
496 template <class Abi> samesize<long int, floatv<Abi>> lrint(floatv<Abi> x);
497 template <class Abi> samesize<long int, doublev<Abi>> lrint(doublev<Abi> x);
498 template <class Abi> samesize<long int, ldoublev<Abi>> lrint(ldoublev<Abi> x);
499 template <class Abi> samesize<long long int, floatv<Abi>> llrint(floatv<Abi> x);
500 template <class Abi> samesize<long long int, doublev<Abi>> llrint(doublev<Abi> x);
501 template <class Abi> samesize<long long int, ldoublev<Abi>> llrint(ldoublev<Abi> x);
502 
503 template <class Abi> floatv<Abi> round(floatv<Abi> x);
504 template <class Abi> doublev<Abi> round(doublev<Abi> x);
505 template <class Abi> ldoublev<Abi> round(ldoublev<Abi> x);
506 template <class Abi> samesize<long int, floatv<Abi>> lround(floatv<Abi> x);
507 template <class Abi> samesize<long int, doublev<Abi>> lround(doublev<Abi> x);
508 template <class Abi> samesize<long int, ldoublev<Abi>> lround(ldoublev<Abi> x);
509 template <class Abi> samesize<long long int, floatv<Abi>> llround(floatv<Abi> x);
510 template <class Abi> samesize<long long int, doublev<Abi>> llround(doublev<Abi> x);
511 template <class Abi> samesize<long long int, ldoublev<Abi>> llround(ldoublev<Abi> x);
512 
513 template <class Abi> floatv<Abi> trunc(floatv<Abi> x);
514 template <class Abi> doublev<Abi> trunc(doublev<Abi> x);
515 template <class Abi> ldoublev<Abi> trunc(ldoublev<Abi> x);
516 
517 template <class Abi> floatv<Abi> fmod(floatv<Abi> x, floatv<Abi> y);
518 template <class Abi> doublev<Abi> fmod(doublev<Abi> x, doublev<Abi> y);
519 template <class Abi> ldoublev<Abi> fmod(ldoublev<Abi> x, ldoublev<Abi> y);
520 
521 template <class Abi> floatv<Abi> remainder(floatv<Abi> x, floatv<Abi> y);
522 template <class Abi> doublev<Abi> remainder(doublev<Abi> x, doublev<Abi> y);
523 template <class Abi> ldoublev<Abi> remainder(ldoublev<Abi> x, ldoublev<Abi> y);
524 
525 template <class Abi> floatv<Abi> remquo(floatv<Abi> x, floatv<Abi> y, samesize<int, floatv<Abi>>* quo);
526 template <class Abi> doublev<Abi> remquo(doublev<Abi> x, doublev<Abi> y, samesize<int, doublev<Abi>>* quo);
527 template <class Abi> ldoublev<Abi> remquo(ldoublev<Abi> x, ldoublev<Abi> y, samesize<int, ldoublev<Abi>>* quo);
528 
529 template <class Abi> floatv<Abi> copysign(floatv<Abi> x, floatv<Abi> y);
530 template <class Abi> doublev<Abi> copysign(doublev<Abi> x, doublev<Abi> y);
531 template <class Abi> ldoublev<Abi> copysign(ldoublev<Abi> x, ldoublev<Abi> y);
532 
533 template <class Abi> doublev<Abi> nan(const char* tagp);
534 template <class Abi> floatv<Abi> nanf(const char* tagp);
535 template <class Abi> ldoublev<Abi> nanl(const char* tagp);
536 
537 template <class Abi> floatv<Abi> nextafter(floatv<Abi> x, floatv<Abi> y);
538 template <class Abi> doublev<Abi> nextafter(doublev<Abi> x, doublev<Abi> y);
539 template <class Abi> ldoublev<Abi> nextafter(ldoublev<Abi> x, ldoublev<Abi> y);
540 
541 template <class Abi> floatv<Abi> nexttoward(floatv<Abi> x, ldoublev<Abi> y);
542 template <class Abi> doublev<Abi> nexttoward(doublev<Abi> x, ldoublev<Abi> y);
543 template <class Abi> ldoublev<Abi> nexttoward(ldoublev<Abi> x, ldoublev<Abi> y);
544 
545 template <class Abi> floatv<Abi> fdim(floatv<Abi> x, floatv<Abi> y);
546 template <class Abi> doublev<Abi> fdim(doublev<Abi> x, doublev<Abi> y);
547 template <class Abi> ldoublev<Abi> fdim(ldoublev<Abi> x, ldoublev<Abi> y);
548 
549 template <class Abi> floatv<Abi> fmax(floatv<Abi> x, floatv<Abi> y);
550 template <class Abi> doublev<Abi> fmax(doublev<Abi> x, doublev<Abi> y);
551 template <class Abi> ldoublev<Abi> fmax(ldoublev<Abi> x, ldoublev<Abi> y);
552 
553 template <class Abi> floatv<Abi> fmin(floatv<Abi> x, floatv<Abi> y);
554 template <class Abi> doublev<Abi> fmin(doublev<Abi> x, doublev<Abi> y);
555 template <class Abi> ldoublev<Abi> fmin(ldoublev<Abi> x, ldoublev<Abi> y);
556 
557 template <class Abi> floatv<Abi> fma(floatv<Abi> x, floatv<Abi> y, floatv<Abi> z);
558 template <class Abi> doublev<Abi> fma(doublev<Abi> x, doublev<Abi> y, doublev<Abi> z);
559 template <class Abi> ldoublev<Abi> fma(ldoublev<Abi> x, ldoublev<Abi> y, ldoublev<Abi> z);
560 
561 template <class Abi> samesize<int, floatv<Abi>> fpclassify(floatv<Abi> x);
562 template <class Abi> samesize<int, doublev<Abi>> fpclassify(doublev<Abi> x);
563 template <class Abi> samesize<int, ldoublev<Abi>> fpclassify(ldoublev<Abi> x);
564 
565 template <class Abi> simd_mask<float, Abi> isfinite(floatv<Abi> x);
566 template <class Abi> simd_mask<double, Abi> isfinite(doublev<Abi> x);
567 template <class Abi> simd_mask<long double, Abi> isfinite(ldoublev<Abi> x);
568 
569 template <class Abi> simd_mask<float, Abi> isinf(floatv<Abi> x);
570 template <class Abi> simd_mask<double, Abi> isinf(doublev<Abi> x);
571 template <class Abi> simd_mask<long double, Abi> isinf(ldoublev<Abi> x);
572 
573 template <class Abi> simd_mask<float, Abi> isnan(floatv<Abi> x);
574 template <class Abi> simd_mask<double, Abi> isnan(doublev<Abi> x);
575 template <class Abi> simd_mask<long double, Abi> isnan(ldoublev<Abi> x);
576 
577 template <class Abi> simd_mask<float, Abi> isnormal(floatv<Abi> x);
578 template <class Abi> simd_mask<double, Abi> isnormal(doublev<Abi> x);
579 template <class Abi> simd_mask<long double, Abi> isnormal(ldoublev<Abi> x);
580 
581 template <class Abi> simd_mask<float, Abi> signbit(floatv<Abi> x);
582 template <class Abi> simd_mask<double, Abi> signbit(doublev<Abi> x);
583 template <class Abi> simd_mask<long double, Abi> signbit(ldoublev<Abi> x);
584 
585 template <class Abi> simd_mask<float, Abi> isgreater(floatv<Abi> x, floatv<Abi> y);
586 template <class Abi> simd_mask<double, Abi> isgreater(doublev<Abi> x, doublev<Abi> y);
587 template <class Abi> simd_mask<long double, Abi> isgreater(ldoublev<Abi> x, ldoublev<Abi> y);
588 
589 template <class Abi> simd_mask<float, Abi> isgreaterequal(floatv<Abi> x, floatv<Abi> y);
590 template <class Abi> simd_mask<double, Abi> isgreaterequal(doublev<Abi> x, doublev<Abi> y);
591 template <class Abi> simd_mask<long double, Abi> isgreaterequal(ldoublev<Abi> x, ldoublev<Abi> y);
592 
593 template <class Abi> simd_mask<float, Abi> isless(floatv<Abi> x, floatv<Abi> y);
594 template <class Abi> simd_mask<double, Abi> isless(doublev<Abi> x, doublev<Abi> y);
595 template <class Abi> simd_mask<long double, Abi> isless(ldoublev<Abi> x, ldoublev<Abi> y);
596 
597 template <class Abi> simd_mask<float, Abi> islessequal(floatv<Abi> x, floatv<Abi> y);
598 template <class Abi> simd_mask<double, Abi> islessequal(doublev<Abi> x, doublev<Abi> y);
599 template <class Abi> simd_mask<long double, Abi> islessequal(ldoublev<Abi> x, ldoublev<Abi> y);
600 
601 template <class Abi> simd_mask<float, Abi> islessgreater(floatv<Abi> x, floatv<Abi> y);
602 template <class Abi> simd_mask<double, Abi> islessgreater(doublev<Abi> x, doublev<Abi> y);
603 template <class Abi> simd_mask<long double, Abi> islessgreater(ldoublev<Abi> x, ldoublev<Abi> y);
604 
605 template <class Abi> simd_mask<float, Abi> isunordered(floatv<Abi> x, floatv<Abi> y);
606 template <class Abi> simd_mask<double, Abi> isunordered(doublev<Abi> x, doublev<Abi> y);
607 template <class Abi> simd_mask<long double, Abi> isunordered(ldoublev<Abi> x, ldoublev<Abi> y);
608 
609 template <class V> struct simd_div_t { V quot, rem; };
610 template <class Abi> simd_div_t<scharv<Abi>> div(scharv<Abi> numer, scharv<Abi> denom);
611 template <class Abi> simd_div_t<shortv<Abi>> div(shortv<Abi> numer, shortv<Abi> denom);
612 template <class Abi> simd_div_t<intv<Abi>> div(intv<Abi> numer, intv<Abi> denom);
613 template <class Abi> simd_div_t<longv<Abi>> div(longv<Abi> numer, longv<Abi> denom);
614 template <class Abi> simd_div_t<llongv<Abi>> div(llongv<Abi> numer, llongv<Abi> denom);
615 
616 // [simd.mask.class]
617 template <class T, class Abi>
618 class simd_mask {
619 public:
620  using value_type = bool;
621  using reference = see below;
622  using simd_type = simd<T, Abi>;
623  using abi_type = Abi;
624  static constexpr size_t size() noexcept;
625  simd_mask() = default;
626 
627  // broadcast constructor
628  explicit simd_mask(value_type) noexcept;
629 
630  // implicit type conversion constructor
631  template <class U> simd_mask(const simd_mask<U, simd_abi::fixed_size<size()>>&) noexcept;
632 
633  // load constructor
634  template <class Flags> simd_mask(const value_type* mem, Flags);
635 
636  // loads [simd.mask.copy]
637  template <class Flags> void copy_from(const value_type* mem, Flags);
638  template <class Flags> void copy_to(value_type* mem, Flags) const;
639 
640  // scalar access [simd.mask.subscr]
641  reference operator[](size_t);
642  value_type operator[](size_t) const;
643 
644  // unary operators [simd.mask.unary]
645  simd_mask operator!() const noexcept;
646 
647  // simd_mask binary operators [simd.mask.binary]
648  friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept;
649  friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept;
650  friend simd_mask operator& (const simd_mask&, const simd_mask&) noexcept;
651  friend simd_mask operator| (const simd_mask&, const simd_mask&) noexcept;
652  friend simd_mask operator^ (const simd_mask&, const simd_mask&) noexcept;
653 
654  // simd_mask compound assignment [simd.mask.cassign]
655  friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept;
656  friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept;
657  friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept;
658 
659  // simd_mask compares [simd.mask.comparison]
660  friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept;
661  friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept;
662 };
663 
664 } // parallelism_v2
665 } // std::experimental
666 
667 */
668 
669 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
670 #define _LIBCPP_STD_VER 17
671 #define _LIBCPP_COMPILER_CLANG_BASED 1
672 #define _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER 1
673 #define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD namespace std::experimental {
674 #define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD }
675 #define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD_ABI namespace std::experimental::simd_abi {
676 #define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI }
677 #define _LIBCPP_INLINE_VAR inline
678 #define _LIBCPP_PUSH_MACROS
679 #define _LIBCPP_POP_MACROS
680 #define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 512 // is not really used for now for sycl::ext::oneapi::experimental::invoke_simd
681 
682 #include <algorithm>
683 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
684 
685 
686 
687 #include <array>
688 #include <cstddef>
689 #ifndef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
690 #include <experimental/__config>
691 #endif // !ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
692 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
693 #include <cstdint>
694 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
695 #include <functional>
696 
697 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
698 # pragma GCC system_header
699 #endif
700 
702 #ifndef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
703 #include <__undef_macros>
704 #endif // !ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
705 
706 
708 
709 #if _LIBCPP_STD_VER >= 17
710 
711 enum class _StorageKind {
712  _Scalar,
713  _Array,
714  _VecExt,
715 };
716 
717 template <_StorageKind __kind, int _Np>
718 struct __simd_abi {};
719 
720 template <class _Tp, class _Abi>
721 class __simd_storage {};
722 
723 template <class _Tp, int __num_element>
724 class __simd_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>> {
725  std::array<_Tp, __num_element> __storage_;
726 
727  template <class, class>
728  friend struct simd;
729 
730  template <class, class>
731  friend struct simd_mask;
732 
733 public:
734  _Tp __get(size_t __index) const noexcept { return __storage_[__index]; }
735  void __set(size_t __index, _Tp __val) noexcept {
736  __storage_[__index] = __val;
737  }
738 };
739 
740 template <class _Tp>
742  _Tp __storage_;
743 
744  template <class, class>
745  friend struct simd;
746 
747  template <class, class>
748  friend struct simd_mask;
749 
750 public:
751  _Tp __get(size_t __index) const noexcept { return (&__storage_)[__index]; }
752  void __set(size_t __index, _Tp __val) noexcept {
753  (&__storage_)[__index] = __val;
754  }
755 };
756 
757 #ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION
758 
759 constexpr size_t __floor_pow_of_2(size_t __val) {
760  return ((__val - 1) & __val) == 0 ? __val
761  : __floor_pow_of_2((__val - 1) & __val);
762 }
763 
764 constexpr size_t __ceil_pow_of_2(size_t __val) {
765  return __val == 1 ? 1 : __floor_pow_of_2(__val - 1) << 1;
766 }
767 
768 template <class _Tp, size_t __bytes>
770 #if !defined(_LIBCPP_COMPILER_CLANG_BASED)
771  typedef _Tp type __attribute__((vector_size(__ceil_pow_of_2(__bytes))));
772 #endif
773 };
774 
775 #if defined(_LIBCPP_COMPILER_CLANG_BASED)
776 #define _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, _NUM_ELEMENT) \
777  template <> \
778  struct __vec_ext_traits<_TYPE, sizeof(_TYPE) * _NUM_ELEMENT> { \
779  using type = \
780  _TYPE __attribute__((vector_size(sizeof(_TYPE) * _NUM_ELEMENT))); \
781  }
782 
783 #define _LIBCPP_SPECIALIZE_VEC_EXT_32(_TYPE) \
784  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 1); \
785  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 2); \
786  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 3); \
787  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 4); \
788  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 5); \
789  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 6); \
790  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 7); \
791  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 8); \
792  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 9); \
793  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 10); \
794  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 11); \
795  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 12); \
796  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 13); \
797  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 14); \
798  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 15); \
799  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 16); \
800  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 17); \
801  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 18); \
802  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 19); \
803  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 20); \
804  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 21); \
805  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 22); \
806  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 23); \
807  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 24); \
808  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 25); \
809  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 26); \
810  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 27); \
811  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 28); \
812  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 29); \
813  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 30); \
814  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 31); \
815  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 32)
816 
830 _LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned long long);
834 
835 #undef _LIBCPP_SPECIALIZE_VEC_EXT_32
836 #undef _LIBCPP_SPECIALIZE_VEC_EXT
837 #endif
838 
839 template <class _Tp, int __num_element>
840 class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>> {
841  using _StorageType =
842  typename __vec_ext_traits<_Tp, sizeof(_Tp) * __num_element>::type;
843 
844  _StorageType __storage_;
845 
846  template <class, class>
847  friend struct simd;
848 
849  template <class, class>
850  friend struct simd_mask;
851 
852 public:
853  _Tp __get(size_t __index) const noexcept { return __storage_[__index]; }
854  void __set(size_t __index, _Tp __val) noexcept {
855  __storage_[__index] = __val;
856  }
857 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
858  const _StorageType& data() const noexcept { return __storage_; }
859 #endif
860 };
861 
862 #endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION
863 
864 template <class _Vp, class _Tp, class _Abi>
866  static_assert(std::is_same_v<_Vp, _Tp>, "");
867 
868  template <class, class>
869  friend struct simd;
870 
871  template <class, class>
872  friend struct simd_mask;
873 
875  size_t __index_;
876 
877  __simd_reference(__simd_storage<_Tp, _Abi>* __ptr, size_t __index)
878  : __ptr_(__ptr), __index_(__index) {}
879 
880  __simd_reference(const __simd_reference&) = default;
881 
882 public:
883  __simd_reference() = delete;
885 
886  operator _Vp() const { return __ptr_->__get(__index_); }
887 
888  __simd_reference operator=(_Vp __value) && {
889  __ptr_->__set(__index_, __value);
890  return *this;
891  }
892 
894  return std::move(*this) = __ptr_->__get(__index_) + 1;
895  }
896 
897  _Vp operator++(int) && {
898  auto __val = __ptr_->__get(__index_);
899  __ptr_->__set(__index_, __val + 1);
900  return __val;
901  }
902 
904  return std::move(*this) = __ptr_->__get(__index_) - 1;
905  }
906 
907  _Vp operator--(int) && {
908  auto __val = __ptr_->__get(__index_);
909  __ptr_->__set(__index_, __val - 1);
910  return __val;
911  }
912 
913  __simd_reference operator+=(_Vp __value) && {
914  return std::move(*this) = __ptr_->__get(__index_) + __value;
915  }
916 
917  __simd_reference operator-=(_Vp __value) && {
918  return std::move(*this) = __ptr_->__get(__index_) - __value;
919  }
920 
921  __simd_reference operator*=(_Vp __value) && {
922  return std::move(*this) = __ptr_->__get(__index_) * __value;
923  }
924 
925  __simd_reference operator/=(_Vp __value) && {
926  return std::move(*this) = __ptr_->__get(__index_) / __value;
927  }
928 
929  __simd_reference operator%=(_Vp __value) && {
930  return std::move(*this) = __ptr_->__get(__index_) % __value;
931  }
932 
933  __simd_reference operator>>=(_Vp __value) && {
934  return std::move(*this) = __ptr_->__get(__index_) >> __value;
935  }
936 
937  __simd_reference operator<<=(_Vp __value) && {
938  return std::move(*this) = __ptr_->__get(__index_) << __value;
939  }
940 
941  __simd_reference operator&=(_Vp __value) && {
942  return std::move(*this) = __ptr_->__get(__index_) & __value;
943  }
944 
945  __simd_reference operator|=(_Vp __value) && {
946  return std::move(*this) = __ptr_->__get(__index_) | __value;
947  }
948 
949  __simd_reference operator^=(_Vp __value) && {
950  return std::move(*this) = __ptr_->__get(__index_) ^ __value;
951  }
952 };
953 
954 template <class _To, class _From>
955 constexpr decltype(_To{std::declval<_From>()}, true)
957  return true;
958 }
959 
960 template <class _To>
962  return false;
963 }
964 
965 template <class _From, class _To>
966 constexpr std::enable_if_t<std::is_arithmetic_v<_To> &&
967  std::is_arithmetic_v<_From>,
968  bool>
970  return __is_non_narrowing_convertible_impl<_To>(_From{});
971 }
972 
973 template <class _From, class _To>
974 constexpr std::enable_if_t<!(std::is_arithmetic_v<_To> &&
975  std::is_arithmetic_v<_From>),
976  bool>
978  return false;
979 }
980 
981 template <class _Tp>
982 constexpr _Tp __variadic_sum() {
983  return _Tp{};
984 }
985 
986 template <class _Tp, class _Up, class... _Args>
987 constexpr _Tp __variadic_sum(_Up __first, _Args... __rest) {
988  return static_cast<_Tp>(__first) + __variadic_sum<_Tp>(__rest...);
989 }
990 
991 template <class _Tp>
992 struct __nodeduce {
993  using type = _Tp;
994 };
995 
996 template <class _Tp>
997 constexpr bool __vectorizable() {
998  return std::is_arithmetic_v<_Tp> && !std::is_const_v<_Tp> &&
999  !std::is_volatile_v<_Tp> && !std::is_same_v<_Tp, bool>;
1000 }
1001 
1004 
1006 
1007 template <int _Np>
1009 
1010 template <class _Tp>
1011 inline constexpr size_t max_fixed_size = 32;
1012 
1013 template <class _Tp>
1014 using compatible = fixed_size<16 / sizeof(_Tp)>;
1015 
1016 #ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION
1017 template <class _Tp>
1019  _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>;
1020 #else
1021 template <class _Tp>
1022 using native =
1023  fixed_size<_Tp, _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>;
1024 #endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION
1025 
1028 
1029 template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
1030 class simd;
1031 template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
1032 class simd_mask;
1033 
1036 template <size_t>
1040 template <size_t _Np>
1042 
1043 // traits [simd.traits]
1044 template <class _Tp>
1045 struct is_abi_tag : std::integral_constant<bool, false> {};
1046 
1047 template <_StorageKind __kind, int _Np>
1048 struct is_abi_tag<__simd_abi<__kind, _Np>>
1049  : std::integral_constant<bool, true> {};
1050 
1051 template <class _Tp>
1052 struct is_simd : std::integral_constant<bool, false> {};
1053 
1054 template <class _Tp, class _Abi>
1055 struct is_simd<simd<_Tp, _Abi>> : std::integral_constant<bool, true> {};
1056 
1057 template <class _Tp>
1058 struct is_simd_mask : std::integral_constant<bool, false> {};
1059 
1060 template <class _Tp, class _Abi>
1061 struct is_simd_mask<simd_mask<_Tp, _Abi>> : std::integral_constant<bool, true> {
1062 };
1063 
1064 template <class _Tp>
1065 struct is_simd_flag_type : std::integral_constant<bool, false> {};
1066 
1067 template <>
1069  : std::integral_constant<bool, true> {};
1070 
1071 template <>
1073  : std::integral_constant<bool, true> {};
1074 
1075 template <size_t _Align>
1077  : std::integral_constant<bool, true> {};
1078 
1079 template <class _Tp>
1080 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
1081 template <class _Tp>
1082 inline constexpr bool is_simd_v = is_simd<_Tp>::value;
1083 template <class _Tp>
1085 template <class _Tp>
1087 template <class _Tp, size_t _Np>
1089  using type = simd_abi::fixed_size<_Np>;
1090 };
1091 template <class _Tp, size_t _Np>
1093 
1094 template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
1095 struct simd_size;
1096 
1097 template <class _Tp, _StorageKind __kind, int _Np>
1098 struct simd_size<_Tp, __simd_abi<__kind, _Np>>
1099  : std::integral_constant<size_t, _Np> {
1100  static_assert(
1101  std::is_arithmetic_v<_Tp> &&
1102  !std::is_same_v<std::remove_const_t<_Tp>, bool>,
1103  "Element type should be vectorizable");
1104 };
1105 
1106 // TODO: implement it.
1107 template <class _Tp, class _Up = typename _Tp::value_type>
1109 
1110 template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
1111 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
1112 
1113 template <class _Tp, class _Up = typename _Tp::value_type>
1115 
1116 // class template simd [simd.class]
1117 template <class _Tp>
1119 template <class _Tp, int _Np>
1121 
1122 // class template simd_mask [simd.mask.class]
1123 template <class _Tp>
1125 
1126 template <class _Tp, int _Np>
1128 
1129 // casts [simd.casts]
1130 template <class _Tp>
1132  template <class _Up, class _Abi>
1134 };
1135 
1136 template <class _Tp, class _NewAbi>
1137 struct __static_simd_cast_traits<simd<_Tp, _NewAbi>> {
1138  template <class _Up, class _Abi>
1139  static std::enable_if_t<simd<_Up, _Abi>::size() ==
1143 };
1144 
1145 template <class _Tp>
1147  template <class _Up, class _Abi>
1148  static std::enable_if_t<
1149  __is_non_narrowing_arithmetic_convertible<_Up, _Tp>(),
1152 };
1153 
1154 template <class _Tp, class _NewAbi>
1155 struct __simd_cast_traits<simd<_Tp, _NewAbi>> {
1156  template <class _Up, class _Abi>
1157  static std::enable_if_t<
1158  __is_non_narrowing_arithmetic_convertible<_Up, _Tp>() &&
1162 };
1163 
1164 template <class _Tp, class _Up, class _Abi>
1165 auto simd_cast(const simd<_Up, _Abi>& __v)
1166  -> decltype(__simd_cast_traits<_Tp>::__apply(__v)) {
1168 }
1169 
1170 template <class _Tp, class _Up, class _Abi>
1172  -> decltype(__static_simd_cast_traits<_Tp>::__apply(__v)) {
1174 }
1175 
1176 template <class _Tp, class _Abi>
1179 
1180 template <class _Tp, class _Abi>
1183 
1184 template <class _Tp, size_t _Np>
1186 
1187 template <class _Tp, size_t _Np>
1189 
1190 template <class _Tp, size_t _Np>
1192 
1193 template <class _Tp, size_t _Np>
1195 
1196 template <size_t... __sizes, class _Tp, class _Abi>
1197 tuple<simd<_Tp, abi_for_size_t<_Tp, __sizes>>...> split(const simd<_Tp, _Abi>&);
1198 
1199 template <size_t... __sizes, class _Tp, class _Abi>
1200 tuple<simd_mask<_Tp, abi_for_size_t<_Tp, __sizes>>...>
1202 
1203 template <class _SimdType, class _Abi>
1204 array<_SimdType, simd_size<typename _SimdType::value_type, _Abi>::value /
1205  _SimdType::size()>
1207 
1208 template <class _SimdType, class _Abi>
1209 array<_SimdType, simd_size<typename _SimdType::value_type, _Abi>::value /
1210  _SimdType::size()>
1212 
1213 template <class _Tp, class... _Abis>
1216 
1217 template <class _Tp, class... _Abis>
1218 simd_mask<_Tp,
1221 
1222 // reductions [simd.mask.reductions]
1223 template <class _Tp, class _Abi>
1225 template <class _Tp, class _Abi>
1227 template <class _Tp, class _Abi>
1229 template <class _Tp, class _Abi>
1231 template <class _Tp, class _Abi>
1233 template <class _Tp, class _Abi>
1235 template <class _Tp, class _Abi>
1237 bool all_of(bool) noexcept;
1238 bool any_of(bool) noexcept;
1239 bool none_of(bool) noexcept;
1240 bool some_of(bool) noexcept;
1241 int popcount(bool) noexcept;
1244 
1245 // masked assignment [simd.whereexpr]
1246 template <class _MaskType, class _Tp>
1248 template <class _MaskType, class _Tp>
1249 class where_expression;
1250 
1251 // masked assignment [simd.mask.where]
1252 template <class _Tp, class _Abi>
1255 
1256 template <class _Tp, class _Abi>
1259  const simd<_Tp, _Abi>&) noexcept;
1260 
1261 template <class _Tp, class _Abi>
1263 where(const typename __nodeduce<simd_mask<_Tp, _Abi>>::type&,
1265 
1266 template <class _Tp, class _Abi>
1268 where(const typename __nodeduce<simd_mask<_Tp, _Abi>>::type&,
1270 
1271 template <class _Tp>
1273 
1274 template <class _Tp>
1276 
1277 // reductions [simd.reductions]
1278 template <class _Tp, class _Abi, class _BinaryOp = std::plus<_Tp>>
1279 _Tp reduce(const simd<_Tp, _Abi>&, _BinaryOp = _BinaryOp());
1280 
1281 template <class _MaskType, class _SimdType, class _BinaryOp>
1282 typename _SimdType::value_type
1284  typename _SimdType::value_type neutral_element, _BinaryOp binary_op);
1285 
1286 template <class _MaskType, class _SimdType>
1287 typename _SimdType::value_type
1289  plus<typename _SimdType::value_type> binary_op = {});
1290 
1291 template <class _MaskType, class _SimdType>
1292 typename _SimdType::value_type
1294  multiplies<typename _SimdType::value_type> binary_op);
1295 
1296 template <class _MaskType, class _SimdType>
1297 typename _SimdType::value_type
1299  bit_and<typename _SimdType::value_type> binary_op);
1300 
1301 template <class _MaskType, class _SimdType>
1302 typename _SimdType::value_type
1304  bit_or<typename _SimdType::value_type> binary_op);
1305 
1306 template <class _MaskType, class _SimdType>
1307 typename _SimdType::value_type
1309  bit_xor<typename _SimdType::value_type> binary_op);
1310 
1311 template <class _Tp, class _Abi>
1312 _Tp hmin(const simd<_Tp, _Abi>&);
1313 template <class _MaskType, class _SimdType>
1314 typename _SimdType::value_type
1316 template <class _Tp, class _Abi>
1317 _Tp hmax(const simd<_Tp, _Abi>&);
1318 template <class _MaskType, class _SimdType>
1319 typename _SimdType::value_type
1321 
1322 // algorithms [simd.alg]
1323 template <class _Tp, class _Abi>
1325 
1326 template <class _Tp, class _Abi>
1328 
1329 template <class _Tp, class _Abi>
1330 std::pair<simd<_Tp, _Abi>, simd<_Tp, _Abi>>
1332 
1333 template <class _Tp, class _Abi>
1335  const simd<_Tp, _Abi>&);
1336 
1337 // [simd.whereexpr]
1338 // TODO implement where expressions.
1339 template <class _MaskType, class _Tp>
1341 public:
1344  remove_const_t<_Tp>operator-() const&&;
1345  template <class _Up, class _Flags>
1346  void copy_to(_Up*, _Flags) const&&;
1347 };
1348 
1349 template <class _MaskType, class _Tp>
1350 class where_expression : public const_where_expression<_MaskType, _Tp> {
1351 public:
1354  template <class _Up>
1355  void operator=(_Up&&);
1356  template <class _Up>
1357  void operator+=(_Up&&);
1358  template <class _Up>
1359  void operator-=(_Up&&);
1360  template <class _Up>
1361  void operator*=(_Up&&);
1362  template <class _Up>
1363  void operator/=(_Up&&);
1364  template <class _Up>
1365  void operator%=(_Up&&);
1366  template <class _Up>
1367  void operator&=(_Up&&);
1368  template <class _Up>
1369  void operator|=(_Up&&);
1370  template <class _Up>
1371  void operator^=(_Up&&);
1372  template <class _Up>
1373  void operator<<=(_Up&&);
1374  template <class _Up>
1375  void operator>>=(_Up&&);
1376  void operator++();
1377  void operator++(int);
1378  void operator--();
1379  void operator--(int);
1380  template <class _Up, class _Flags>
1381  void copy_from(const _Up*, _Flags);
1382 };
1383 
1384 // [simd.class]
1385 // TODO: implement simd
1386 template <class _Tp, class _Abi>
1387 class simd {
1388 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1389  template <class, class> friend class simd;
1390 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1391 public:
1392  using value_type = _Tp;
1395  using abi_type = _Abi;
1396 
1397  simd() = default;
1398  simd(const simd&) = default;
1399  simd& operator=(const simd&) = default;
1400 
1401  static constexpr size_t size() noexcept {
1403  }
1404 
1405 private:
1407 
1408 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1409  // TODO Temp implemenation to convert between esimd::simd and oneapi::simd.
1410  // Implement free conversion functions instead.
1411 public:
1412  // TODO Won't compile for non-VecExt, maybe _StorageType should be added to
1413  // all ABIs.
1415 
1416  // implicit conversion to storage type
1417  operator raw_storage_type() const { return __s_.__storage_; }
1418 
1419  // implicit conversion from storage type
1420  simd(const raw_storage_type &__raw_simd) { __s_.__storage_ = __raw_simd; }
1421 
1422 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1423 
1424 private:
1425  template <class _Up>
1426  static constexpr bool __can_broadcast() {
1427  return (std::is_arithmetic_v<_Up> &&
1428  __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()) ||
1429  (!std::is_arithmetic_v<_Up> &&
1430  std::is_convertible_v<_Up, _Tp>) ||
1431  std::is_same_v<std::remove_const_t<_Up>, int> ||
1432  (std::is_same_v<std::remove_const_t<_Up>,
1433  unsigned int> &&
1434  std::is_unsigned_v<_Tp>);
1435  }
1436 
1437  template <class _Generator, size_t... __indicies>
1438  static constexpr decltype(
1439  std::forward_as_tuple(std::declval<_Generator>()(
1440  std::integral_constant<size_t, __indicies>())...),
1441  bool())
1442  __can_generate(std::index_sequence<__indicies...>) {
1443  return !__variadic_sum<bool>(
1444  !__can_broadcast<decltype(std::declval<_Generator>()(
1445  std::integral_constant<size_t, __indicies>()))>()...);
1446  }
1447 
1448  template <class _Generator>
1449  static bool __can_generate(...) {
1450  return false;
1451  }
1452 
1453  template <class _Generator, size_t... __indicies>
1454  void __generator_init(_Generator&& __g, std::index_sequence<__indicies...>) {
1455  int __not_used[]{((*this)[__indicies] =
1456  __g(std::integral_constant<size_t, __indicies>()),
1457  0)...};
1458  (void)__not_used;
1459  }
1460 
1461 public:
1462  // implicit type conversion constructor
1463 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1464  template <class _Up,
1465  class = std::enable_if_t<
1466  std::is_same_v<_Abi, __simd_abi<_StorageKind::_VecExt, size()>> &&
1467  __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()>>
1468  simd(const simd<_Up, _Abi>& __v) {
1469  __s_.__storage_ = __builtin_convertvector(__v.__s_.__storage_, raw_storage_type);
1470  }
1471 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1472  template <class _Up,
1473  class = std::enable_if_t<
1474  std::is_same_v<_Abi, simd_abi::fixed_size<size()>> &&
1475  __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()>>
1476  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __v) {
1477  for (size_t __i = 0; __i < size(); __i++) {
1478  (*this)[__i] = static_cast<_Tp>(__v[__i]);
1479  }
1480  }
1481 
1482  // implicit broadcast constructor
1483  template <class _Up,
1484  class = std::enable_if_t<__can_broadcast<_Up>()>>
1485  simd(_Up&& __rv) {
1486  auto __v = static_cast<_Tp>(__rv);
1487  for (size_t __i = 0; __i < size(); __i++) {
1488  (*this)[__i] = __v;
1489  }
1490  }
1491 
1492  // generator constructor
1493  template <class _Generator,
1494  int = std::enable_if_t<
1495  __can_generate<_Generator>(std::make_index_sequence<size()>()),
1496  int>()>
1497  explicit simd(_Generator&& __g) {
1498  __generator_init(std::forward<_Generator>(__g),
1499  std::make_index_sequence<size()>());
1500  }
1501 
1502  // load constructor
1503  template <
1504  class _Up, class _Flags,
1505  class = std::enable_if_t<__vectorizable<_Up>()>,
1506  class = std::enable_if_t<is_simd_flag_type<_Flags>::value>>
1507  simd(const _Up* __buffer, _Flags) {
1508  // TODO: optimize for overaligned flags
1509  for (size_t __i = 0; __i < size(); __i++) {
1510  (*this)[__i] = static_cast<_Tp>(__buffer[__i]);
1511  }
1512  }
1513 
1514  // loads [simd.load]
1515  template <class _Up, class _Flags>
1516  std::enable_if_t<__vectorizable<_Up>() &&
1518  copy_from(const _Up* __buffer, _Flags) {
1519  *this = simd(__buffer, _Flags());
1520  }
1521 
1522  // stores [simd.store]
1523  template <class _Up, class _Flags>
1524  std::enable_if_t<__vectorizable<_Up>() &&
1526  copy_to(_Up* __buffer, _Flags) const {
1527  // TODO: optimize for overaligned flags
1528  for (size_t __i = 0; __i < size(); __i++) {
1529  __buffer[__i] = static_cast<_Up>((*this)[__i]);
1530  }
1531  }
1532 
1533  // scalar access [simd.subscr]
1534  reference operator[](size_t __i) { return reference(&__s_, __i); }
1535 
1536  value_type operator[](size_t __i) const { return __s_.__get(__i); }
1537 
1538  // unary operators [simd.unary]
1544  simd operator~() const;
1545  simd operator+() const;
1546  simd operator-() const;
1547 
1548  // binary operators [simd.binary]
1549  friend simd operator+(const simd&, const simd&);
1550  friend simd operator-(const simd&, const simd&);
1551  friend simd operator*(const simd&, const simd&);
1552  friend simd operator/(const simd&, const simd&);
1553  friend simd operator%(const simd&, const simd&);
1554  friend simd operator&(const simd&, const simd&);
1555  friend simd operator|(const simd&, const simd&);
1556  friend simd operator^(const simd&, const simd&);
1557  friend simd operator<<(const simd&, const simd&);
1558  friend simd operator>>(const simd&, const simd&);
1559  friend simd operator<<(const simd&, int);
1560  friend simd operator>>(const simd&, int);
1561 
1562  // compound assignment [simd.cassign]
1563  friend simd& operator+=(simd&, const simd&);
1564  friend simd& operator-=(simd&, const simd&);
1565  friend simd& operator*=(simd&, const simd&);
1566  friend simd& operator/=(simd&, const simd&);
1567  friend simd& operator%=(simd&, const simd&);
1568 
1569  friend simd& operator&=(simd&, const simd&);
1570  friend simd& operator|=(simd&, const simd&);
1571  friend simd& operator^=(simd&, const simd&);
1572  friend simd& operator<<=(simd&, const simd&);
1573  friend simd& operator>>=(simd&, const simd&);
1574  friend simd& operator<<=(simd&, int);
1575  friend simd& operator>>=(simd&, int);
1576 
1577  // compares [simd.comparison]
1578  friend mask_type operator==(const simd&, const simd&);
1579  friend mask_type operator!=(const simd&, const simd&);
1580  friend mask_type operator>=(const simd&, const simd&);
1581  friend mask_type operator<=(const simd&, const simd&);
1582  friend mask_type operator>(const simd&, const simd&);
1583  //friend mask_type operator<(const simd&, const simd&);
1584 };
1585 
1586 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1587 template <class _Abi>
1588 struct __abi_storage_kind : public std::false_type {};
1589 
1590 template <_StorageKind _K, int _Np>
1591 struct __abi_storage_kind<__simd_abi<_K, _Np>> : public std::true_type {
1592  static constexpr _StorageKind value = _K;
1593 };
1594 
1595 template <typename _Tp, class _Abi> struct __mask_element {
1597  "only clang vector-based ABI is supported for now.");
1598  using type =
1599  std::conditional_t<sizeof(_Tp) == 1, uint8_t,
1600  std::conditional_t<sizeof(_Tp) == 2, uint16_t,
1601  std::conditional_t<sizeof(_Tp) == 4, uint32_t,
1602  std::conditional_t<sizeof(_Tp) == 8, uint64_t, void>>>>;
1603 };
1604 
1605 // Represents a reference to a simd_mask object element.
1606 template <class _Tp, class _Abi>
1608 private:
1609  using _Vp = bool;
1610  using _simd_mask_element_type = typename __mask_element<_Tp, _Abi>::type;
1611 
1612 public:
1613  template <class, class>
1614  friend struct simd_mask;
1615 
1617  size_t __index_;
1618 
1620  : __ptr_(__ptr), __index_(__index) {}
1621 
1623 
1626 
1627  operator _Vp() const { return (_Vp)__ptr_->__get(__index_); }
1628 
1630  __ptr_->__set(__index_, (_Tp)__value);
1631  return *this;
1632  }
1633 
1635  return std::move(*this) = ((_Vp)__ptr_->__get(__index_)) & __value;
1636  }
1637 
1639  return std::move(*this) = ((_Vp)__ptr_->__get(__index_)) | __value;
1640  }
1641 
1643  return std::move(*this) = ((_Vp)__ptr_->__get(__index_)) ^ __value;
1644  }
1645 };
1646 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1647 
1648 // [simd.mask.class]
1649 template <class _Tp, class _Abi>
1650 // TODO: implement simd_mask
1651 class simd_mask {
1652 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1653  using element_type = typename __mask_element<_Tp, _Abi>::type;
1654 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1655 public:
1656  using value_type = bool;
1657 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1659 #else
1660  // TODO: this is strawman implementation. Turn it into a proxy type.
1661  using reference = bool&;
1662 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1664  using abi_type = _Abi;
1665 
1666 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1667  static constexpr size_t size() noexcept { return simd_type::size(); }
1668 #else
1669  static constexpr size_t size() noexcept;
1670 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1671 
1672 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1673  const auto& data() const noexcept { return __s_.data(); }
1674 #endif
1675 
1676  simd_mask() = default;
1677 
1678  // broadcast constructor
1679 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1680  // TODO inefficient, use this's storage directly
1682  for (size_t __i = 0; __i < size(); __i++) {
1683  (*this)[__i] = __v;
1684  }
1685  }
1686 #else
1687  explicit simd_mask(value_type) noexcept;
1688 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1689 
1690  // implicit type conversion constructor
1691 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1692  template <class _Up>
1693  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __v) noexcept {
1694  copyElements(__v);
1695  }
1696 
1697  template <class _Up>
1699  copyElements(__v);
1700  }
1701 #else
1702  template <class _Up>
1703  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>&) noexcept;
1704 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1705 
1706  // load constructor
1707  template <class _Flags>
1708  simd_mask(const value_type*, _Flags);
1709 
1710  // loads [simd.mask.copy]
1711  template <class _Flags>
1712  void copy_from(const value_type*, _Flags);
1713  template <class _Flags>
1714  void copy_to(value_type*, _Flags) const;
1715 
1716  // scalar access [simd.mask.subscr]
1717 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1718  reference operator[](size_t __i) { return reference(&__s_, __i); }
1719  value_type operator[](size_t __i) const { return __s_.__get(__i) != 0; }
1720 #else
1721  reference operator[](size_t);
1722  value_type operator[](size_t) const;
1723 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1724 
1725  // unary operators [simd.mask.unary]
1727 
1728  // simd_mask binary operators [simd.mask.binary]
1729  friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept;
1730  friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept;
1731  friend simd_mask operator&(const simd_mask&, const simd_mask&)noexcept;
1732  friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept;
1733  friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept;
1734 
1735  // simd_mask compound assignment [simd.mask.cassign]
1736  friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept;
1737  friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept;
1738  friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept;
1739 
1740  // simd_mask compares [simd.mask.comparison]
1741  friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept;
1742  friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept;
1743 
1744 #ifdef ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1745 private:
1747 
1748  // TODO inefficient, use this's and __v's storage directly
1749  template <class _Up, class _UAbi>
1750  inline void copyElements(const simd_mask<_Up, _UAbi> & __v) noexcept {
1751  for (size_t __i = 0; __i < size(); __i++) {
1752  (*this)[__i] = static_cast<element_type>(__v[__i]);
1753  }
1754  }
1755 #endif // ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
1756 };
1757 
1758 #endif // _LIBCPP_STD_VER >= 17
1759 
1761 
1763 
1764 // Removed for ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD {
1765 //#endif /* _LIBCPP_EXPERIMENTAL_SIMD */
1766 // } Removed for ENABLE_SYCL_EXT_ONEAPI_INVOKE_SIMD
__simd_storage< _simd_mask_element_type, _Abi > * __ptr_
Definition: simd.hpp:1616
__simd_mask_reference operator&=(_Vp __value) &&
Definition: simd.hpp:1634
__simd_mask_reference & operator=(const __simd_mask_reference &)=delete
__simd_mask_reference(const __simd_mask_reference &)=default
__simd_mask_reference()=delete
__simd_mask_reference(__simd_storage< _Tp, _Abi > *__ptr, size_t __index)
Definition: simd.hpp:1619
__simd_mask_reference operator|=(_Vp __value) &&
Definition: simd.hpp:1638
__simd_mask_reference operator^=(_Vp __value) &&
Definition: simd.hpp:1642
__simd_mask_reference operator=(_Vp __value) &&
Definition: simd.hpp:1629
_Vp operator++(int) &&
Definition: simd.hpp:897
__simd_reference operator^=(_Vp __value) &&
Definition: simd.hpp:949
__simd_reference operator%=(_Vp __value) &&
Definition: simd.hpp:929
__simd_reference operator+=(_Vp __value) &&
Definition: simd.hpp:913
__simd_reference operator++() &&
Definition: simd.hpp:893
__simd_reference operator|=(_Vp __value) &&
Definition: simd.hpp:945
__simd_reference operator<<=(_Vp __value) &&
Definition: simd.hpp:937
__simd_reference & operator=(const __simd_reference &)=delete
__simd_reference operator*=(_Vp __value) &&
Definition: simd.hpp:921
__simd_reference operator-=(_Vp __value) &&
Definition: simd.hpp:917
_Vp operator--(int) &&
Definition: simd.hpp:907
__simd_reference()=delete
__simd_reference operator--() &&
Definition: simd.hpp:903
__simd_reference operator=(_Vp __value) &&
Definition: simd.hpp:888
__simd_reference operator>>=(_Vp __value) &&
Definition: simd.hpp:933
__simd_reference operator&=(_Vp __value) &&
Definition: simd.hpp:941
__simd_reference operator/=(_Vp __value) &&
Definition: simd.hpp:925
void __set(size_t __index, _Tp __val) noexcept
Definition: simd.hpp:752
const_where_expression(const const_where_expression &)=delete
const_where_expression & operator=(const const_where_expression &)=delete
void copy_to(_Up *, _Flags) const &&
remove_const_t< _Tp > operator-() const &&
_Abi abi_type
Definition: simd.hpp:1664
simd_mask()=default
simd_mask(value_type __v) noexcept
Definition: simd.hpp:1681
simd_mask(const value_type *, _Flags)
value_type operator[](size_t __i) const
Definition: simd.hpp:1719
static constexpr size_t size() noexcept
Definition: simd.hpp:1667
reference operator[](size_t __i)
Definition: simd.hpp:1718
simd_mask operator!() const noexcept
simd_mask(const simd_mask< _Up, abi_type > &__v) noexcept
Definition: simd.hpp:1698
simd_mask(const simd_mask< _Up, simd_abi::fixed_size< size()>> &__v) noexcept
Definition: simd.hpp:1693
void copy_from(const value_type *, _Flags)
bool value_type
Definition: simd.hpp:1656
const auto & data() const noexcept
Definition: simd.hpp:1673
void copy_to(value_type *, _Flags) const
Definition: simd.hpp:1387
friend simd operator<<(const simd &, int)
friend simd & operator>>=(simd &, const simd &)
simd & operator++()
friend simd operator>>(const simd &, const simd &)
friend simd operator>>(const simd &, int)
std::enable_if_t< __vectorizable< _Up >) &&is_simd_flag_type< _Flags >::value > copy_to(_Up *__buffer, _Flags) const
Definition: simd.hpp:1526
friend simd operator<<(const simd &, const simd &)
simd(_Generator &&__g)
Definition: simd.hpp:1497
friend simd & operator<<=(simd &, const simd &)
simd operator-() const
friend simd operator%(const simd &, const simd &)
friend simd operator+(const simd &, const simd &)
simd(const simd< _Up, _Abi > &__v)
Definition: simd.hpp:1468
mask_type operator!() const
static constexpr size_t size() noexcept
Definition: simd.hpp:1401
friend simd & operator*=(simd &, const simd &)
friend simd operator^(const simd &, const simd &)
simd(_Up &&__rv)
Definition: simd.hpp:1485
friend simd & operator%=(simd &, const simd &)
friend simd & operator^=(simd &, const simd &)
friend mask_type operator>=(const simd &, const simd &)
simd & operator=(const simd &)=default
_Abi abi_type
Definition: simd.hpp:1395
simd()=default
friend simd operator|(const simd &, const simd &)
friend mask_type operator<=(const simd &, const simd &)
friend mask_type operator>(const simd &, const simd &)
simd(const simd &)=default
friend simd & operator+=(simd &, const simd &)
friend simd & operator|=(simd &, const simd &)
friend simd operator*(const simd &, const simd &)
friend mask_type operator==(const simd &, const simd &)
friend mask_type operator!=(const simd &, const simd &)
_Tp value_type
Definition: simd.hpp:1392
friend simd & operator/=(simd &, const simd &)
reference operator[](size_t __i)
Definition: simd.hpp:1534
simd & operator--()
simd(const simd< _Up, simd_abi::fixed_size< size()>> &__v)
Definition: simd.hpp:1476
friend simd & operator-=(simd &, const simd &)
friend simd & operator>>=(simd &, int)
friend simd & operator<<=(simd &, int)
friend simd & operator&=(simd &, const simd &)
value_type operator[](size_t __i) const
Definition: simd.hpp:1536
simd(const raw_storage_type &__raw_simd)
Definition: simd.hpp:1420
simd operator~() const
friend simd operator-(const simd &, const simd &)
simd operator--(int)
typename __simd_storage< _Tp, _Abi >::_StorageType raw_storage_type
Definition: simd.hpp:1414
simd operator++(int)
friend simd operator&(const simd &, const simd &)
friend simd operator/(const simd &, const simd &)
std::enable_if_t< __vectorizable< _Up >) &&is_simd_flag_type< _Flags >::value > copy_from(const _Up *__buffer, _Flags)
Definition: simd.hpp:1518
simd operator+() const
where_expression(const where_expression &)=delete
void operator--(int)
void operator&=(_Up &&)
void operator/=(_Up &&)
void operator*=(_Up &&)
void operator>>=(_Up &&)
void copy_from(const _Up *, _Flags)
void operator^=(_Up &&)
void operator|=(_Up &&)
void operator<<=(_Up &&)
void operator-=(_Up &&)
void operator%=(_Up &&)
void operator+=(_Up &&)
void operator=(_Up &&)
void operator++(int)
where_expression & operator=(const where_expression &)=delete
detail::simd_mask_type< N > simd_mask
Represents a simd mask os size N.
Definition: simd.hpp:199
sycl::ext::oneapi::experimental::annotated_ref< T, property_list_t > reference
std::experimental::simd< T, simd_abi::native_fixed_size< T, N > > simd
T & operator[](std::ptrdiff_t idx) const noexcept
__attribute__((always_inline)) auto invoke_simd(sycl
The invoke_simd free function invokes a SIMD function using all work-items in a sub_group.
const void value_type
Definition: multi_ptr.hpp:457
_StorageKind
Definition: simd.hpp:711
#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD_ABI
Definition: simd.hpp:675
bool any_of(const simd_mask< _Tp, _Abi > &) noexcept
native_simd< _Tp > to_native(const fixed_size_simd< _Tp, _Np > &) noexcept
constexpr element_aligned_tag element_aligned
Definition: simd.hpp:1038
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI
Definition: simd.hpp:676
constexpr size_t __ceil_pow_of_2(size_t __val)
Definition: simd.hpp:764
__simd_abi< _StorageKind::_Array, _Np > fixed_size
Definition: simd.hpp:1008
int popcount(const simd_mask< _Tp, _Abi > &) noexcept
fixed_size_simd< _Tp, simd_size< _Tp, _Abi >::value > to_fixed_size(const simd< _Tp, _Abi > &) noexcept
int find_first_set(const simd_mask< _Tp, _Abi > &)
#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD
Definition: simd.hpp:674
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
int find_last_set(const simd_mask< _Tp, _Abi > &)
#define _LIBCPP_PUSH_MACROS
Definition: simd.hpp:678
constexpr vector_aligned_tag vector_aligned
Definition: simd.hpp:1039
simd< _Tp, _Abi >(min)(const simd< _Tp
_Tp hmax(const simd< _Tp, _Abi > &)
#define _LIBCPP_SPECIALIZE_VEC_EXT_32(_TYPE)
Definition: simd.hpp:783
tuple< simd< _Tp, abi_for_size_t< _Tp, __sizes > >... > split(const simd< _Tp, _Abi > &)
constexpr bool __vectorizable()
Definition: simd.hpp:997
constexpr size_t max_fixed_size
Definition: simd.hpp:1011
typename abi_for_size< _Tp, _Np >::type abi_for_size_t
Definition: simd.hpp:1092
simd< _Tp > to_compatible(const fixed_size_simd< _Tp, _Np > &) noexcept
constexpr bool is_simd_flag_type_v
Definition: simd.hpp:1086
constexpr _Tp __variadic_sum()
Definition: simd.hpp:982
bool none_of(const simd_mask< _Tp, _Abi > &) noexcept
where_expression< simd_mask< _Tp, _Abi >, simd< _Tp, _Abi > > where(const typename simd< _Tp, _Abi >::mask_type &, simd< _Tp, _Abi > &) noexcept
bool some_of(const simd_mask< _Tp, _Abi > &) noexcept
_Tp hmin(const simd< _Tp, _Abi > &)
constexpr bool is_abi_tag_v
Definition: simd.hpp:1080
#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD
Definition: simd.hpp:673
auto static_simd_cast(const simd< _Up, _Abi > &__v) -> decltype(__static_simd_cast_traits< _Tp >::__apply(__v))
Definition: simd.hpp:1171
constexpr size_t __floor_pow_of_2(size_t __val)
Definition: simd.hpp:759
std::pair< simd< _Tp, _Abi >, simd< _Tp, _Abi > > minmax(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
simd< _Tp, abi_for_size_t< _Tp, __variadic_sum(simd_size< _Tp, _Abis >::value...)> > concat(const simd< _Tp, _Abis > &...)
#define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES
Definition: simd.hpp:680
constexpr size_t simd_size_v
Definition: simd.hpp:1111
constexpr bool is_simd_v
Definition: simd.hpp:1082
constexpr std::enable_if_t< std::is_arithmetic_v< _To > &&std::is_arithmetic_v< _From >, bool > __is_non_narrowing_arithmetic_convertible()
Definition: simd.hpp:969
constexpr decltype(_To{std::declval< _From >()}, true) __is_non_narrowing_convertible_impl(_From)
Definition: simd.hpp:956
constexpr size_t memory_alignment_v
Definition: simd.hpp:1114
constexpr overaligned_tag< _Np > overaligned
Definition: simd.hpp:1041
constexpr bool is_simd_mask_v
Definition: simd.hpp:1084
auto simd_cast(const simd< _Up, _Abi > &__v) -> decltype(__simd_cast_traits< _Tp >::__apply(__v))
Definition: simd.hpp:1165
simd< _Tp, _Abi > clamp(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &)
#define _LIBCPP_POP_MACROS
Definition: simd.hpp:679
std::conditional_t< sizeof(_Tp)==1, uint8_t, std::conditional_t< sizeof(_Tp)==2, uint16_t, std::conditional_t< sizeof(_Tp)==4, uint32_t, std::conditional_t< sizeof(_Tp)==8, uint64_t, void > >> > type
Definition: simd.hpp:1602
_Tp type
Definition: simd.hpp:993
static std::enable_if_t< __is_non_narrowing_arithmetic_convertible< _Up, _Tp >) &&simd< _Up, _Abi >::size()==simd< _Tp, _NewAbi >::size(), simd< _Tp, _NewAbi > > __apply(const simd< _Up, _Abi > &__v)
static std::enable_if_t< __is_non_narrowing_arithmetic_convertible< _Up, _Tp >), simd< _Tp, _Abi > > __apply(const simd< _Up, _Abi > &__v)
static std::enable_if_t< simd< _Up, _Abi >::size()==simd< _Tp, _NewAbi >::size(), simd< _Tp, _NewAbi > > __apply(const simd< _Up, _Abi > &__v)
static simd< _Tp, _Abi > __apply(const simd< _Up, _Abi > &__v)
simd_abi::fixed_size< _Np > type
Definition: simd.hpp:1089