DPC++ Runtime
Runtime libraries for oneAPI DPC++
math.hpp
Go to the documentation of this file.
1 //==-------------- math.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement experimental Explicit SIMD math APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
19 
20 namespace sycl {
21 inline namespace _V1 {
22 namespace ext::intel::experimental::esimd {
23 
26 
37 template <typename T0, typename T1, int SZ, typename U,
38  class Sat = __ESIMD_NS::saturation_off_tag>
39 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
40  std::is_integral<T1>::value &&
41  std::is_integral<U>::value,
42  __ESIMD_NS::simd<T0, SZ>>
43 shl(__ESIMD_NS::simd<T1, SZ> src0, U src1, Sat sat = {}) {
44  using ComputationTy =
45  __ESIMD_DNS::computation_type_t<decltype(src0), int32_t>;
46  ComputationTy Src0 = src0;
47  ComputationTy Src1 = src1;
48 
49  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_on_tag>) {
50  if constexpr (std::is_unsigned<T0>::value) {
51  if constexpr (std::is_unsigned<
52  typename ComputationTy::element_type>::value)
53  return __esimd_uushl_sat<T0, typename ComputationTy::element_type, SZ>(
54  Src0.data(), Src1.data());
55  else
56  return __esimd_usshl_sat<T0, typename ComputationTy::element_type, SZ>(
57  Src0.data(), Src1.data());
58  } else {
59  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
60  return __esimd_sushl_sat<T0, typename ComputationTy::element_type, SZ>(
61  Src0.data(), Src1.data());
62  else
63  return __esimd_ssshl_sat<T0, typename ComputationTy::element_type, SZ>(
64  Src0.data(), Src1.data());
65  }
66  } else {
67  if constexpr (std::is_unsigned<T0>::value) {
68  if constexpr (std::is_unsigned<
69  typename ComputationTy::element_type>::value)
70  return __esimd_uushl<T0, typename ComputationTy::element_type, SZ>(
71  Src0.data(), Src1.data());
72  else
73  return __esimd_usshl<T0, typename ComputationTy::element_type, SZ>(
74  Src0.data(), Src1.data());
75  } else {
76  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
77  return __esimd_sushl<T0, typename ComputationTy::element_type, SZ>(
78  Src0.data(), Src1.data());
79  else
80  return __esimd_ssshl<T0, typename ComputationTy::element_type, SZ>(
81  Src0.data(), Src1.data());
82  }
83  }
84 }
85 
95 template <typename T0, typename T1, typename T2,
96  class Sat = __ESIMD_NS::saturation_off_tag>
97 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
98  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
99  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
100  std::is_integral<T0>::value &&
101  std::is_integral<T1>::value &&
102  std::is_integral<T2>::value,
103  std::remove_const_t<T0>>
104 shl(T1 src0, T2 src1, Sat sat = {}) {
105  __ESIMD_NS::simd<T1, 1> Src0 = src0;
106  __ESIMD_NS::simd<T0, 1> Result =
107  esimd::shl<T0, T1, 1, T2, Sat>(Src0, src1, sat);
108  return Result[0];
109 }
110 
121 template <typename T0, typename T1, int SZ, typename U,
122  class Sat = __ESIMD_NS::saturation_off_tag>
123 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
124  std::is_integral<T1>::value &&
125  std::is_integral<U>::value,
126  __ESIMD_NS::simd<T0, SZ>>
127 lsr(__ESIMD_NS::simd<T1, SZ> src0, U src1, Sat sat = {}) {
128  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
129  typedef typename std::make_unsigned<IntermedTy>::type ComputationTy;
130  __ESIMD_NS::simd<ComputationTy, SZ> Src0 = src0;
131  __ESIMD_NS::simd<ComputationTy, SZ> Src1 = src1;
132  // TODO H/W supports saturation with this op - map to more efficient version.
133  __ESIMD_NS::simd<ComputationTy, SZ> Result = Src0.data() >> Src1.data();
134 
135  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
136  return Result;
137  else
138  return __ESIMD_NS::saturate<T0>(Result);
139 }
140 
151 template <typename T0, typename T1, typename T2,
152  class Sat = __ESIMD_NS::saturation_off_tag>
153 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
154  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
155  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
156  std::is_integral<T0>::value &&
157  std::is_integral<T1>::value &&
158  std::is_integral<T2>::value,
159  std::remove_const_t<T0>>
160 lsr(T1 src0, T2 src1, Sat sat = {}) {
161  __ESIMD_NS::simd<T1, 1> Src0 = src0;
162  __ESIMD_NS::simd<T0, 1> Result =
163  esimd::lsr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
164 
165  return Result[0];
166 }
167 
178 template <typename T0, typename T1, int SZ, typename U,
179  class Sat = __ESIMD_NS::saturation_off_tag>
180 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
181  std::is_integral<T1>::value &&
182  std::is_integral<U>::value,
183  __ESIMD_NS::simd<T0, SZ>>
184 asr(__ESIMD_NS::simd<T1, SZ> src0, U src1, Sat sat = {}) {
185  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
186  typedef typename std::make_signed<IntermedTy>::type ComputationTy;
187  __ESIMD_NS::simd<ComputationTy, SZ> Src0 = src0;
188  // TODO H/W supports saturation with this op - map to more efficient version.
189  __ESIMD_NS::simd<ComputationTy, SZ> Result = Src0 >> src1;
190 
191  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
192  return Result;
193  else
194  return __ESIMD_NS::saturate<T0>(Result);
195 }
196 
207 template <typename T0, typename T1, typename T2,
208  class Sat = __ESIMD_NS::saturation_off_tag>
209 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
210  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
211  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
212  std::is_integral<T0>::value &&
213  std::is_integral<T1>::value &&
214  std::is_integral<T2>::value,
215  std::remove_const_t<T0>>
216 asr(T1 src0, T2 src1, Sat sat = {}) {
217  __ESIMD_NS::simd<T1, 1> Src0 = src0;
218  __ESIMD_NS::simd<T0, 1> Result =
219  esimd::asr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
220  return Result[0];
221 }
222 
233 template <typename T0, typename T1, int SZ, typename U,
234  class Sat = __ESIMD_NS::saturation_off_tag>
235 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
236  std::is_integral<T1>::value &&
237  std::is_integral<U>::value,
238  __ESIMD_NS::simd<T0, SZ>>
239 shr(__ESIMD_NS::simd<T1, SZ> src0, U src1, Sat sat = {}) {
240  if constexpr (std::is_unsigned<T1>::value) {
241  return esimd::lsr<T0, T1, SZ, U, Sat>(src0, src1, sat);
242  } else {
243  return esimd::asr<T0, T1, SZ, U, Sat>(src0, src1, sat);
244  }
245 }
246 
256 template <typename T0, typename T1, typename T2,
257  class Sat = __ESIMD_NS::saturation_off_tag>
258 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
259  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
260  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
261  std::is_integral<T0>::value &&
262  std::is_integral<T1>::value &&
263  std::is_integral<T2>::value,
264  std::remove_const_t<T0>>
265 shr(T1 src0, T2 src1, Sat sat = {}) {
266  __ESIMD_NS::simd<T1, 1> Src0 = src0;
267  __ESIMD_NS::simd<T0, 1> Result =
268  esimd::shr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
269  return Result[0];
270 }
271 
280 template <typename T0, typename T1, int SZ>
281 __ESIMD_API std::enable_if_t<
282  __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
283  int64_t, uint64_t>() &&
284  __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
285  int64_t, uint64_t>(),
286  __ESIMD_NS::simd<T0, SZ>>
287 rol(__ESIMD_NS::simd<T1, SZ> src0, __ESIMD_NS::simd<T1, SZ> src1) {
288  return __esimd_rol<T0, T1, SZ>(src0.data(), src1.data());
289 }
290 
299 template <typename T0, typename T1, int SZ, typename U>
300 __ESIMD_API std::enable_if_t<
301  __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
302  int64_t, uint64_t>() &&
303  __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
304  int64_t, uint64_t>() &&
305  __ESIMD_NS::detail::is_type<U, int16_t, uint16_t, int32_t, uint32_t,
306  int64_t, uint64_t>(),
307  __ESIMD_NS::simd<T0, SZ>>
308 rol(__ESIMD_NS::simd<T1, SZ> src0, U src1) {
309  __ESIMD_NS::simd<T1, SZ> Src1 = src1;
310  return esimd::rol<T0>(src0, Src1);
311 }
312 
320 template <typename T0, typename T1, typename T2>
321 __ESIMD_API std::enable_if_t<
322  __ESIMD_DNS::is_esimd_scalar<T0>::value &&
323  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
324  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
325  __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
326  int64_t, uint64_t>() &&
327  __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
328  int64_t, uint64_t>() &&
329  __ESIMD_NS::detail::is_type<T2, int16_t, uint16_t, int32_t, uint32_t,
330  int64_t, uint64_t>(),
331  std::remove_const_t<T0>>
332 rol(T1 src0, T2 src1) {
333  __ESIMD_NS::simd<T1, 1> Src0 = src0;
334  __ESIMD_NS::simd<T0, 1> Result = esimd::rol<T0, T1, 1, T2>(Src0, src1);
335  return Result[0];
336 }
337 
346 template <typename T0, typename T1, int SZ>
347 __ESIMD_API std::enable_if_t<
348  __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
349  int64_t, uint64_t>() &&
350  __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
351  int64_t, uint64_t>(),
352  __ESIMD_NS::simd<T0, SZ>>
353 ror(__ESIMD_NS::simd<T1, SZ> src0, __ESIMD_NS::simd<T1, SZ> src1) {
354  return __esimd_ror<T0, T1, SZ>(src0.data(), src1.data());
355 }
356 
365 template <typename T0, typename T1, int SZ, typename U>
366 __ESIMD_API std::enable_if_t<
367  __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
368  int64_t, uint64_t>() &&
369  __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
370  int64_t, uint64_t>() &&
371  __ESIMD_NS::detail::is_type<U, int16_t, uint16_t, int32_t, uint32_t,
372  int64_t, uint64_t>(),
373  __ESIMD_NS::simd<T0, SZ>>
374 ror(__ESIMD_NS::simd<T1, SZ> src0, U src1) {
375  __ESIMD_NS::simd<T1, SZ> Src1 = src1;
376  return esimd::ror<T0>(src0, Src1);
377 }
378 
386 template <typename T0, typename T1, typename T2>
387 __ESIMD_API std::enable_if_t<
388  __ESIMD_DNS::is_esimd_scalar<T0>::value &&
389  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
390  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
391  __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
392  int64_t, uint64_t>() &&
393  __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
394  int64_t, uint64_t>() &&
395  __ESIMD_NS::detail::is_type<T2, int16_t, uint16_t, int32_t, uint32_t,
396  int64_t, uint64_t>(),
397  std::remove_const_t<T0>>
398 ror(T1 src0, T2 src1) {
399  __ESIMD_NS::simd<T1, 1> Src0 = src0;
400  __ESIMD_NS::simd<T0, 1> Result = esimd::ror<T0, T1, 1, T2>(Src0, src1);
401  return Result[0];
402 }
403 
405 
408 
414 template <typename T, typename T0, typename T1, int N>
415 __ESIMD_API __ESIMD_NS::simd<T, N> imul_impl(__ESIMD_NS::simd<T, N> &rmd,
416  __ESIMD_NS::simd<T0, N> src0,
417  __ESIMD_NS::simd<T1, N> src1) {
418  static_assert(__ESIMD_DNS::is_dword_type<T>::value &&
419  __ESIMD_DNS::is_dword_type<T0>::value &&
420  __ESIMD_DNS::is_dword_type<T1>::value,
421  "expected 32-bit integer vector operands.");
422  using Comp32T = __ESIMD_DNS::computation_type_t<T0, T1>;
423  auto Src0 = src0.template bit_cast_view<Comp32T>();
424  auto Src1 = src1.template bit_cast_view<Comp32T>();
425 
426  // Compute the result using 64-bit multiplication operation.
427  using Comp64T =
428  std::conditional_t<std::is_signed_v<Comp32T>, int64_t, uint64_t>;
429  __ESIMD_NS::simd<Comp64T, N> Product64 = Src0;
430  Product64 *= Src1;
431 
432  // Split the 32-bit high and low parts to return them from this function.
433  auto Product32 = Product64.template bit_cast_view<T>();
434  if constexpr (N == 1) {
435  rmd = Product32[0];
436  return Product32[1];
437  } else {
438  rmd = Product32.template select<N, 2>(0);
439  return Product32.template select<N, 2>(1);
440  }
441 }
442 
447 template <typename T, typename T0, typename T1, int N>
448 __ESIMD_API __ESIMD_NS::simd<T, N> imul(__ESIMD_NS::simd<T, N> &rmd,
449  __ESIMD_NS::simd<T0, N> src0,
450  __ESIMD_NS::simd<T1, N> src1) {
451  return imul_impl<T, T0, T1, N>(rmd, src0, src1);
452 }
453 
459 template <typename T, typename T0, typename T1, int N>
460 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T1>::value,
461  __ESIMD_NS::simd<T, N>>
462 imul(__ESIMD_NS::simd<T, N> &rmd, __ESIMD_NS::simd<T0, N> src0, T1 src1) {
463  __ESIMD_NS::simd<T1, N> Src1V = src1;
464  return esimd::imul_impl<T, T0, T1, N>(rmd, src0, Src1V);
465 }
466 
472 template <typename T, typename T0, typename T1, int N>
473 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value,
474  __ESIMD_NS::simd<T, N>>
475 imul(__ESIMD_NS::simd<T, N> &rmd, T0 src0, __ESIMD_NS::simd<T1, N> src1) {
476  __ESIMD_NS::simd<T0, N> Src0V = src0;
477  return esimd::imul_impl<T, T0, T1, N>(rmd, Src0V, src1);
478 }
479 
484 template <typename T, typename T0, typename T1>
485 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T>::value &&
486  __ESIMD_DNS::is_dword_type<T0>::value &&
487  __ESIMD_DNS::is_dword_type<T1>::value,
488  T>
489 imul(T &rmd, T0 src0, T1 src1) {
490  __ESIMD_NS::simd<T, 1> RmdV = rmd;
491  __ESIMD_NS::simd<T0, 1> Src0V = src0;
492  __ESIMD_NS::simd<T1, 1> Src1V = src1;
493  __ESIMD_NS::simd<T, 1> Res =
494  esimd::imul_impl<T, T0, T1, 1>(RmdV, Src0V, Src1V);
495  rmd = RmdV[0];
496  return Res[0];
497 }
498 
499 template <int N>
501  "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
502 __ESIMD_API __ESIMD_NS::simd<uint32_t, N> addc(
503  __ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
504  __ESIMD_NS::simd<uint32_t, N> src1) {
505  return __ESIMD_NS::addc(carry, src0, src1);
506 }
507 
508 template <int N>
510  "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
511 __ESIMD_API __ESIMD_NS::simd<uint32_t, N> addc(
512  __ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
513  uint32_t src1) {
514  return __ESIMD_NS::addc(carry, src0, src1);
515 }
516 
517 template <int N>
519  "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
520 __ESIMD_API __ESIMD_NS::simd<uint32_t, N> addc(
521  __ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
522  __ESIMD_NS::simd<uint32_t, N> src1) {
523  return __ESIMD_NS::addc(carry, src0, src1);
524 }
525 
527  "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
528 __ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
529  return __ESIMD_NS::addc(carry, src0, src1);
530 }
531 
532 template <int N>
534  "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
535 __ESIMD_API __ESIMD_NS::simd<uint32_t, N> subb(
536  __ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
537  __ESIMD_NS::simd<uint32_t, N> src1) {
538  return __ESIMD_NS::subb(borrow, src0, src1);
539 }
540 
541 template <int N>
543  "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
544 __ESIMD_API __ESIMD_NS::simd<uint32_t, N> subb(
545  __ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
546  uint32_t src1) {
547  return __ESIMD_NS::subb(borrow, src0, src1);
548 }
549 
550 template <int N>
552  "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
553 __ESIMD_API __ESIMD_NS::simd<uint32_t, N> subb(
554  __ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
555  __ESIMD_NS::simd<uint32_t, N> src1) {
556  return __ESIMD_NS::subb(borrow, src0, src1);
557 }
558 
560  "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
561 __ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
562  return __ESIMD_NS::subb(borrow, src0, src1);
563 }
564 
572 template <typename T, int SZ, typename U>
573 __SYCL_DEPRECATED("Use: src0 / src1;")
574 __ESIMD_API
575  std::enable_if_t<std::is_integral<T>::value && std::is_integral<U>::value,
576  __ESIMD_NS::simd<T, SZ>> quot(__ESIMD_NS::simd<T, SZ> src0,
577  U src1) {
578  return src0 / src1;
579 }
580 
587 template <typename T0, typename T1>
588 __SYCL_DEPRECATED("Use: src0 / src1;")
589 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
590  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
591  std::is_integral<T0>::value &&
592  std::is_integral<T1>::value,
593  std::remove_const_t<T0>> quot(T0 src0, T1 src1) {
594  return src0 / src1;
595 }
596 
604 template <typename T, int SZ, typename U>
605 __SYCL_DEPRECATED("Use: src0 % src1;")
606 __ESIMD_API
607  std::enable_if_t<std::is_integral<T>::value && std::is_integral<U>::value,
608  __ESIMD_NS::simd<T, SZ>> mod(__ESIMD_NS::simd<T, SZ> src0,
609  U src1) {
610  return src0 % src1;
611 }
612 
619 template <typename T0, typename T1>
620 __SYCL_DEPRECATED("Use: src0 % src1;")
621 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
622  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
623  std::is_integral<T0>::value &&
624  std::is_integral<T1>::value,
625  std::remove_const_t<T0>> mod(T0 src0, T1 src1) {
626  return src0 % src1;
627 }
628 
638 template <typename T, int SZ, typename U>
639 __SYCL_DEPRECATED("Use: T res = src0 / src1; T remainder = src0 % src1;")
640 __ESIMD_API std::enable_if_t<
641  std::is_integral<T>::value && std::is_integral<U>::value,
642  __ESIMD_NS::simd<T, SZ>> div(__ESIMD_NS::simd<T, SZ> &remainder,
643  __ESIMD_NS::simd<T, SZ> src0, U src1) {
644  remainder = src0 % src1;
645  return src0 / src1;
646 }
647 
657 template <typename T, int SZ, typename U>
658 __SYCL_DEPRECATED("Use: T res = src0 / src1; T remainder = src0 % src1;")
659 __ESIMD_API std::enable_if_t<
660  std::is_integral<T>::value && std::is_integral<U>::value &&
661  __ESIMD_DNS::is_esimd_scalar<U>::value,
662  __ESIMD_NS::simd<T, SZ>> div(__ESIMD_NS::simd<T, SZ> &remainder, U src0,
663  __ESIMD_NS::simd<T, SZ> src1) {
664  remainder = src0 % src1;
665  return src0 / src1;
666 }
667 
677 template <typename RT, typename T0, typename T1>
678 __SYCL_DEPRECATED("Use: T res = src0 / src1; T remainder = src0 % src1;")
679 __ESIMD_API std::enable_if_t<
680  __ESIMD_DNS::is_esimd_scalar<RT>::value &&
681  __ESIMD_DNS::is_esimd_scalar<T0>::value &&
682  __ESIMD_DNS::is_esimd_scalar<T1>::value,
683  std::remove_const_t<RT>> div(__ESIMD_NS::simd<std::remove_const_t<RT>, 1>
684  &remainder,
685  T0 src0, T1 src1) {
686  remainder[0] = src0 % src1;
687  return src0 / src1;
688 }
689 
690 // Dot product builtins
691 #if defined(ESIMD_GEN7_5) || defined(ESIMD_GEN8) || defined(ESIMD_GEN8_5) || \
692  defined(ESIMD_GEN9) || defined(ESIMD_GEN9_5)
693 
704 template <typename T0, typename T1, int SZ, typename U,
705  class Sat = __ESIMD_NS::saturation_off_tag>
706 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
707 __ESIMD_API __ESIMD_NS::simd<T0, SZ> dp2(__ESIMD_NS::simd<T1, SZ> src0, U src1,
708  Sat sat = {}) {
709  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
710  __ESIMD_NS::simd<float, SZ> Src0 = src0;
711  __ESIMD_NS::simd<float, SZ> Src1 = src1;
712  __ESIMD_NS::simd<float, SZ> Result = __esimd_dp2(Src0.data(), Src1.data());
713  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
714  return Result;
715  else
716  return __ESIMD_NS::saturate<T0>(Result);
717 }
718 
729 template <typename T0, typename T1, int SZ, typename U,
730  class Sat = __ESIMD_NS::saturation_off_tag>
731 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
732 __ESIMD_API __ESIMD_NS::simd<T0, SZ> dp3(__ESIMD_NS::simd<T1, SZ> src0, U src1,
733  Sat sat = {}) {
734  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
735  __ESIMD_NS::simd<float, SZ> Src0 = src0;
736  __ESIMD_NS::simd<float, SZ> Src1 = src1;
737  __ESIMD_NS::simd<float, SZ> Result = __esimd_dp3(Src0.data(), Src1.data());
738  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
739  return Result;
740  else
741  return __ESIMD_NS::saturate<T0>(Result);
742 }
743 
754 template <typename T0, typename T1, int SZ, typename U,
755  class Sat = __ESIMD_NS::saturation_off_tag>
756 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
757 __ESIMD_API __ESIMD_NS::simd<T0, SZ> dp4(__ESIMD_NS::simd<T1, SZ> src0, U src1,
758  Sat sat = {}) {
759  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
760  __ESIMD_NS::simd<float, SZ> Src0 = src0;
761  __ESIMD_NS::simd<float, SZ> Src1 = src1;
762  __ESIMD_NS::simd<float, SZ> Result = __esimd_dp4(Src0.data(), Src1.data());
763  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
764  return Result;
765  else
766  return __ESIMD_NS::saturate<T0>(Result);
767 }
768 
779 template <typename T0, typename T1, typename U, int SZ,
780  class Sat = __ESIMD_NS::saturation_off_tag>
781 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
782 __ESIMD_API __ESIMD_NS::simd<T0, SZ> dph(__ESIMD_NS::simd<T1, SZ> src0, U src1,
783  Sat sat = {}) {
784  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
785  __ESIMD_NS::simd<float, SZ> Src0 = src0;
786  __ESIMD_NS::simd<float, SZ> Src1 = src1;
787  __ESIMD_NS::simd<float, SZ> Result = __esimd_dph(Src0.data(), Src1.data());
788  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
789  return Result;
790  else
791  return __ESIMD_NS::saturate<T0>(Result);
792 }
793 
805 template <typename RT, typename T1, typename T2, int SZ,
806  class Sat = __ESIMD_NS::saturation_off_tag>
807 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
808 __ESIMD_API __ESIMD_NS::simd<RT, SZ> line(__ESIMD_NS::simd<T1, 4> src0,
809  __ESIMD_NS::simd<T2, SZ> src1,
810  Sat sat = {}) {
811  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
812 
813  __ESIMD_NS::simd<float, 4> Src0 = src0;
814  __ESIMD_NS::simd<float, SZ> Src1 = src1;
815  __ESIMD_NS::simd<float, SZ> Result = __esimd_line(Src0.data(), Src1.data());
816 
817  __ESIMD_NS::simd<RT, SZ> Result;
818  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
819  return Result;
820  else
821  return __ESIMD_NS::saturate<RT>(Result);
822 }
823 
835 template <typename RT, typename T, int SZ,
836  class Sat = __ESIMD_NS::saturation_off_tag>
837 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
838 __ESIMD_API __ESIMD_NS::simd<RT, SZ> line(float P, float Q,
839  __ESIMD_NS::simd<T, SZ> src1,
840  Sat sat = {}) {
841  __ESIMD_NS::simd<float, 4> Src0 = P;
842  Src0(3) = Q;
843  return esimd::line<RT>(Src0, src1, sat);
844 }
845 
846 #else
847 // The old implementation is to generate vISA IRs for dp2/dp3/dp4/dph/line.
848 // Now We change to use direct mul/add, and hope to generate mad instructions
849 // at the end, to still get the performance as good as HW solution.
850 // We rely on "pragma unroll" to get better code.
851 // The only input and return types for these APIs are floats.
852 // In order to be able to use the old emu code, we keep the template argument
853 // for the type, although the type "T" can only be float.
854 // We use std::enable_if to force the float type only.
855 // If the gen is not specified we warn the programmer that they are potentially
856 // using a less efficient implementation if not on GEN10 or above.
857 
868 template <typename T0, typename T1, int SZ, typename U,
869  class Sat = __ESIMD_NS::saturation_off_tag>
870 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
871 __ESIMD_API std::enable_if_t<
872  __ESIMD_DNS::is_fp_or_dword_type<T1>::value &&
873  std::is_floating_point<T1>::value &&
874  __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
875  std::is_floating_point<U>::value,
876  __ESIMD_NS::simd<T0, SZ>> dp2(__ESIMD_NS::simd<T1, SZ> src0, U src1,
877  Sat sat = {}) {
878  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
879 
880  __ESIMD_NS::simd<float, SZ> Src1 = src1;
881  __ESIMD_NS::simd<float, SZ> Result;
882 #pragma unroll
883  for (int i = 0; i < SZ; i += 4) {
884  Result.select<4, 1>(i) = src0[i] * Src1[i] + src0[i + 1] * Src1[i + 1];
885  }
886  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
887  return Result;
888  else
889  return __ESIMD_NS::saturate<T1>(Result);
890 }
891 
902 template <typename T0, typename T1, int SZ, typename U,
903  class Sat = __ESIMD_NS::saturation_off_tag>
904 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
905 __ESIMD_API std::enable_if_t<
906  __ESIMD_DNS::is_fp_or_dword_type<T1>::value &&
907  std::is_floating_point<T1>::value &&
908  __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
909  std::is_floating_point<U>::value,
910  __ESIMD_NS::simd<T0, SZ>> dp3(__ESIMD_NS::simd<T1, SZ> src0, U src1,
911  Sat sat = {}) {
912  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
913 
914  __ESIMD_NS::simd<float, SZ> Src1 = src1;
915  __ESIMD_NS::simd<float, SZ> Result;
916 #pragma unroll
917  for (int i = 0; i < SZ; i += 4) {
918  Result.select<4, 1>(i) = src0[i] * Src1[i] + src0[i + 1] * Src1[i + 1] +
919  src0[i + 2] * Src1[i + 2];
920  }
921  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
922  return Result;
923  else
924  return __ESIMD_NS::saturate<T1>(Result);
925 }
926 
937 template <typename T0, typename T1, int SZ, typename U,
938  class Sat = __ESIMD_NS::saturation_off_tag>
939 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
940 __ESIMD_API std::enable_if_t<
941  __ESIMD_DNS::is_fp_or_dword_type<T1>::value &&
942  std::is_floating_point<T1>::value &&
943  __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
944  std::is_floating_point<U>::value,
945  __ESIMD_NS::simd<T0, SZ>> dp4(__ESIMD_NS::simd<T1, SZ> src0, U src1,
946  Sat sat = {}) {
947  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
948 
949  __ESIMD_NS::simd<T1, SZ> Src1 = src1;
950  __ESIMD_NS::simd<float, SZ> Result;
951 #pragma unroll
952  for (int i = 0; i < SZ; i += 4) {
953  Result.select<4, 1>(i) = src0[i] * Src1[i] + src0[i + 1] * Src1[i + 1] +
954  src0[i + 2] * Src1[i + 2] +
955  src0[i + 3] * Src1[i + 3];
956  }
957  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
958  return Result;
959  else
960  return __ESIMD_NS::saturate<T1>(Result);
961 }
962 
973 template <typename T, typename U, int SZ,
974  class Sat = __ESIMD_NS::saturation_off_tag>
975 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
976 __ESIMD_API
977  std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
978  std::is_floating_point<T>::value &&
979  __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
980  std::is_floating_point<U>::value,
981  __ESIMD_NS::simd<T, SZ>> dph(__ESIMD_NS::simd<T, SZ> src0,
982  U src1, Sat sat = {}) {
983  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
984 
985  __ESIMD_NS::simd<float, SZ> Src1 = src1;
986  __ESIMD_NS::simd<float, SZ> Result;
987 #pragma unroll
988  for (int i = 0; i < SZ; i += 4) {
989  Result.select<4, 1>(i) = src0[i] * Src1[i] + src0[i + 1] * Src1[i + 1] +
990  src0[i + 2] * Src1[i + 2] + 1.0 * Src1[i + 3];
991  }
992  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
993  return Result;
994  else
995  return __ESIMD_NS::saturate<T>(Result);
996 }
997 
1008 template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
1009 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
1010 __ESIMD_API
1011  std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
1012  std::is_floating_point<T>::value,
1013  __ESIMD_NS::simd<T, SZ>> line(__ESIMD_NS::simd<T, 4> src0,
1014  __ESIMD_NS::simd<T, SZ> src1,
1015  Sat sat = {}) {
1016  static_assert(SZ % 4 == 0, "result size is not a multiple of 4");
1017 
1019  __ESIMD_NS::simd<T, SZ> Result;
1020 #pragma unroll
1021  for (int i = 0; i < SZ; i += 4) {
1022  Result.select<4, 1>(i) = src0[0] * src1[i] + src0[3];
1023  }
1024 
1025  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
1026  return Result;
1027  else
1028  return __ESIMD_NS::saturate<T>(Result);
1029 }
1030 
1042 template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
1043 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
1044 __ESIMD_API
1045  std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
1046  std::is_floating_point<T>::value,
1047  __ESIMD_NS::simd<T, SZ>> line(float P, float Q,
1048  __ESIMD_NS::simd<T, SZ> src1,
1049  Sat sat = {}) {
1050  __ESIMD_NS::simd<T, 4> Src0 = P;
1051  Src0(3) = Q;
1052  return esimd::line<T>(Src0, src1, sat);
1053 }
1054 
1055 #endif
1056 
1063 template <typename T, int SZ>
1065  __ESIMD_NS::simd<float, SZ> Src0 = src0;
1066  return __esimd_frc(Src0.data());
1067 }
1068 
1074 template <typename T> __ESIMD_API T frc(T src0) {
1075  __ESIMD_NS::simd<T, 1> Src0 = src0;
1076  __ESIMD_NS::simd<T, 1> Result = esimd::frc<T>(Src0);
1077  return Result[0];
1078 }
1079 
1080 // lzd - leading zero detection
1081 template <typename RT, typename T0, int SZ,
1082  class Sat = __ESIMD_NS::saturation_off_tag>
1083 __ESIMD_API __ESIMD_NS::simd<RT, SZ> lzd(__ESIMD_NS::simd<T0, SZ> src0,
1084  Sat sat = {}) {
1085  // Saturation parameter ignored
1086  __ESIMD_NS::simd<__ESIMD_NS::uint, SZ> Src0 = src0;
1087  return __esimd_lzd<__ESIMD_NS::uint, SZ>(Src0.data());
1088 }
1089 
1090 template <typename RT, typename T0, class Sat = __ESIMD_NS::saturation_off_tag>
1091 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<RT>::value &&
1092  __ESIMD_DNS::is_esimd_scalar<T0>::value,
1093  std::remove_const_t<RT>>
1094 lzd(T0 src0, Sat sat = {}) {
1095  __ESIMD_NS::simd<T0, 1> Src0 = src0;
1096  __ESIMD_NS::simd<RT, 1> Result = esimd::lzd<RT>(Src0);
1097  return Result[0];
1098 }
1099 
1100 // lrp
1101 #if defined(ESIMD_GEN7_5) || defined(ESIMD_GEN8) || defined(ESIMD_GEN8_5) || \
1102  defined(ESIMD_GEN9) || defined(ESIMD_GEN9_5)
1103 
1104 template <int SZ, typename U, typename V,
1105  class Sat = __ESIMD_NS::saturation_off_tag>
1106 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
1107 __ESIMD_API __ESIMD_NS::simd<float, SZ> lrp(__ESIMD_NS::simd<float, SZ> src0,
1108  U src1, V src2, Sat sat = {}) {
1109  static_assert(SZ >= 4 && (SZ & 0x3) == 0,
1110  "vector size must be a multiple of 4");
1111  __ESIMD_NS::simd<float, SZ> Src1 = src1;
1112  __ESIMD_NS::simd<float, SZ> Src2 = src2;
1113  __ESIMD_NS::simd<float, SZ> Result =
1114  __esimd_lrp<SZ>(src0.data(), Src1.data(), Src2.data());
1115 
1116  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
1117  return Result;
1118  else
1119  return __ESIMD_NS::saturate<float>(Result);
1120 }
1121 
1122 #else
1123 
1124 // The old implementation is to generate vISA IRs for lrp.
1125 // Now We change to use direct mul/add, and hope to generate mad instructions
1126 // at the end, to still get the performance as good as HW solution.
1127 // The only input and return types for these APIs are floats.
1128 // In order to be able to use the old emu code, we keep the template argument
1129 // for the type, although the type "T" can only be float.
1130 // We use std::enable_if to force the float type only.
1131 // If the gen is not specified we warn the programmer that they are potentially
1132 // using less efficient implementation.
1133 template <typename T, int SZ, typename U, typename V,
1134  class Sat = __ESIMD_NS::saturation_off_tag>
1135 __SYCL_DEPRECATED("Gen9 specific: use emulation sequence")
1136 __ESIMD_API
1137  std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
1138  std::is_floating_point<T>::value &&
1139  __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
1140  std::is_floating_point<U>::value,
1141  __ESIMD_NS::simd<T, SZ>> lrp(__ESIMD_NS::simd<T, SZ> src0,
1142  U src1, V src2,
1143  Sat sat = {}) {
1144 
1145  __ESIMD_NS::simd<float, SZ> Src1 = src1;
1146  __ESIMD_NS::simd<float, SZ> Src2 = src2;
1147  __ESIMD_NS::simd<float, SZ> Result;
1148  Result = Src1 * src0 + Src2 * (1.0f - src0);
1149  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
1150  return Result;
1151  else
1152  return __ESIMD_NS::saturate<T>(Result);
1153 }
1154 #endif
1155 
1157 
1160 
1162 template <typename T0, typename T1, int SZ>
1163 __ESIMD_API __ESIMD_NS::simd<T0, SZ> bf_reverse(__ESIMD_NS::simd<T1, SZ> src0) {
1164  __ESIMD_NS::simd<unsigned, SZ> Src0 = src0;
1165  return __esimd_bfrev<unsigned>(Src0.data());
1166 }
1167 
1169 template <typename T0, typename T1>
1170 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1171  __ESIMD_DNS::is_esimd_scalar<T1>::value,
1172  std::remove_const_t<T0>>
1174  __ESIMD_NS::simd<T1, 1> Src0 = src0;
1175  __ESIMD_NS::simd<T0, 1> Result = esimd::bf_reverse<T0>(Src0);
1176  return Result[0];
1177 }
1178 
1180 template <typename T0, typename T1, int SZ, typename U, typename V, typename W>
1181 __ESIMD_API
1182  std::enable_if_t<std::is_integral<T1>::value, __ESIMD_NS::simd<T0, SZ>>
1183  bf_insert(U src0, V src1, W src2, __ESIMD_NS::simd<T1, SZ> src3) {
1184  typedef typename __ESIMD_DNS::dword_type<T1> DT1;
1185  static_assert(std::is_integral<DT1>::value && sizeof(DT1) == sizeof(int),
1186  "operand conversion failed");
1187  __ESIMD_NS::simd<DT1, SZ> Src0 = src0;
1188  __ESIMD_NS::simd<DT1, SZ> Src1 = src1;
1189  __ESIMD_NS::simd<DT1, SZ> Src2 = src2;
1190  __ESIMD_NS::simd<DT1, SZ> Src3 = src3;
1191 
1192  return __esimd_bfi<DT1>(Src0.data(), Src1.data(), Src2.data(), Src3.data());
1193 }
1194 
1196 template <typename T0, typename T1, typename T2, typename T3, typename T4>
1197 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1198  __ESIMD_DNS::is_esimd_scalar<T4>::value,
1199  std::remove_const_t<T0>>
1200 bf_insert(T1 src0, T2 src1, T3 src2, T4 src3) {
1201  __ESIMD_NS::simd<T4, 1> Src3 = src3;
1202  __ESIMD_NS::simd<T0, 1> Result = esimd::bf_insert<T0>(src0, src1, src2, Src3);
1203  return Result[0];
1204 }
1205 
1207 template <typename T0, typename T1, int SZ, typename U, typename V>
1208 __ESIMD_API
1209  std::enable_if_t<std::is_integral<T1>::value, __ESIMD_NS::simd<T0, SZ>>
1210  bf_extract(U src0, V src1, __ESIMD_NS::simd<T1, SZ> src2) {
1211  typedef typename __ESIMD_DNS::dword_type<T1> DT1;
1212  static_assert(std::is_integral<DT1>::value && sizeof(DT1) == sizeof(int),
1213  "operand conversion failed");
1214  __ESIMD_NS::simd<DT1, SZ> Src0 = src0;
1215  __ESIMD_NS::simd<DT1, SZ> Src1 = src1;
1216  __ESIMD_NS::simd<DT1, SZ> Src2 = src2;
1217 
1218  return __esimd_sbfe<DT1>(Src0.data(), Src1.data(), Src2.data());
1219 }
1220 
1222 template <typename T0, typename T1, typename T2, typename T3>
1223 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1224  __ESIMD_DNS::is_esimd_scalar<T3>::value,
1225  std::remove_const_t<T0>>
1226 bf_extract(T1 src0, T2 src1, T3 src2) {
1227  __ESIMD_NS::simd<T3, 1> Src2 = src2;
1228  __ESIMD_NS::simd<T0, 1> Result = esimd::bf_extract<T0>(src0, src1, Src2);
1229  return Result[0];
1230 }
1231 
1233 
1236 
1237 // sincos
1238 template <int SZ, typename U, class Sat = __ESIMD_NS::saturation_off_tag>
1239 __ESIMD_API __ESIMD_NS::simd<float, SZ>
1240 sincos(__ESIMD_NS::simd<float, SZ> &dstcos, U src0, Sat sat = {}) {
1241  dstcos = __ESIMD_NS::cos(src0, sat);
1242  return __ESIMD_NS::sin(src0, sat);
1243 }
1244 
1245 // atan
1246 
1248 namespace detail {
1249 constexpr double __ESIMD_CONST_PI = 3.1415926535897932384626433832795;
1250 } // namespace detail
1252 
1253 template <typename T, int SZ>
1255  static_assert(std::is_floating_point<T>::value,
1256  "Floating point argument type is expected.");
1258 
1259  __ESIMD_NS::simd<T, SZ> OneP((T)1.0);
1260  __ESIMD_NS::simd<T, SZ> OneN((T)-1.0);
1262  __ESIMD_NS::simd_mask<SZ> Gt1 = Src0 > T(1.0);
1263 
1264  sign.merge(OneN, OneP, src0 < 0);
1265 
1266  Src0.merge(__ESIMD_NS::inv(Src0), Gt1);
1267 
1268  __ESIMD_NS::simd<T, SZ> Src0P2 = Src0 * Src0;
1269  __ESIMD_NS::simd<T, SZ> Src0P4 = Src0P2 * Src0P2;
1270 
1271  __ESIMD_NS::simd<T, SZ> Result =
1272  (Src0P4 * T(0.185696) + ((Src0 * T(0.787997) + T(0.63693)) * Src0P2) +
1273  Src0) /
1274  (((((Src0 * -T(0.000121387) + T(0.00202308)) * Src0P2) +
1275  (Src0 * -T(0.0149145)) + T(0.182569)) *
1276  Src0P4) +
1277  ((Src0 * T(0.395889) + T(1.12158)) * Src0P2) + (Src0 * T(0.636918)) +
1278  T(1.0));
1279 
1280  Result.merge(Result - T(detail::__ESIMD_CONST_PI) / T(2.0), Gt1);
1281 
1282  return __ESIMD_NS::abs(Result) * sign;
1283 }
1284 
1285 template <typename T> __ESIMD_API T atan(T src0) {
1286  static_assert(std::is_floating_point<T>::value,
1287  "Floating point argument type is expected.");
1288  __ESIMD_NS::simd<T, 1> Src0 = src0;
1289  __ESIMD_NS::simd<T, 1> Result = esimd::atan(Src0);
1290  return Result[0];
1291 }
1292 
1293 // acos
1294 
1295 template <typename T, int SZ>
1296 __ESIMD_API
1297  std::enable_if_t<std::is_floating_point<T>::value, __ESIMD_NS::simd<T, SZ>>
1300 
1301  __ESIMD_NS::simd_mask<SZ> Neg = src0 < T(0.0);
1302  __ESIMD_NS::simd_mask<SZ> TooBig = Src0 >= T(0.999998);
1303 
1304  // Replace oversized values to ensure no possibility of sqrt of
1305  // a negative value later
1306  Src0.merge(T(0.0), TooBig);
1307 
1308  __ESIMD_NS::simd<T, SZ> Src01m = T(1.0) - Src0;
1309 
1310  __ESIMD_NS::simd<T, SZ> Src0P2 = Src01m * Src01m;
1311  __ESIMD_NS::simd<T, SZ> Src0P4 = Src0P2 * Src0P2;
1312 
1313  __ESIMD_NS::simd<T, SZ> Result =
1314  (((Src01m * T(0.015098965761299077) - T(0.005516443930088506)) * Src0P4) +
1315  ((Src01m * T(0.047654245891495528) + T(0.163910606547823220)) * Src0P2) +
1316  Src01m * T(2.000291665285952400) - T(0.000007239283986332)) *
1317  __ESIMD_NS::rsqrt(Src01m * T(2.0));
1318 
1319  Result.merge(T(0.0), TooBig);
1320  Result.merge(T(detail::__ESIMD_CONST_PI) - Result, Neg);
1321  return Result;
1322 }
1323 
1324 template <typename T>
1325 __ESIMD_API std::enable_if_t<std::is_floating_point<T>::value, T> acos(T src0) {
1326  __ESIMD_NS::simd<T, 1> Src0 = src0;
1327  __ESIMD_NS::simd<T, 1> Result = esimd::acos(Src0);
1328  return Result[0];
1329 }
1330 
1331 // asin
1332 
1333 template <typename T, int SZ>
1334 __ESIMD_API
1335  std::enable_if_t<std::is_floating_point<T>::value, __ESIMD_NS::simd<T, SZ>>
1337  __ESIMD_NS::simd_mask<SZ> Neg = src0 < T(0.0);
1338 
1339  __ESIMD_NS::simd<T, SZ> Result =
1340  T(detail::__ESIMD_CONST_PI / 2.0) - esimd::acos(__ESIMD_NS::abs(src0));
1341 
1342  Result.merge(-Result, Neg);
1343  return Result;
1344 }
1345 
1346 template <typename T>
1347 __ESIMD_API std::enable_if_t<std::is_floating_point<T>::value, T> asin(T src0) {
1348  __ESIMD_NS::simd<T, 1> Src0 = src0;
1349  __ESIMD_NS::simd<T, 1> Result = esimd::asin(Src0);
1350  return Result[0];
1351 }
1353 
1356 
1357 /* atan2_fast - a fast atan2 implementation */
1358 /* vector input */
1359 template <int N>
1360 __ESIMD_NS::simd<float, N> atan2_fast(__ESIMD_NS::simd<float, N> y,
1361  __ESIMD_NS::simd<float, N> x);
1362 /* scalar input */
1363 template <typename T> float atan2_fast(T y, T x);
1364 
1365 /* atan2 - atan2 implementation */
1366 /* For Vector input */
1367 template <int N>
1368 __ESIMD_NS::simd<float, N> atan2(__ESIMD_NS::simd<float, N> y,
1369  __ESIMD_NS::simd<float, N> x);
1370 /* scalar Input */
1371 template <typename T> float atan2(T y, T x);
1372 
1373 /* fmod: */
1374 /* vector input */
1375 template <int N>
1376 __ESIMD_NS::simd<float, N> fmod(__ESIMD_NS::simd<float, N> y,
1377  __ESIMD_NS::simd<float, N> x);
1378 /* scalar Input */
1379 template <typename T> float fmod(T y, T x);
1380 
1381 /* sin_emu - EU emulation for sin(x) */
1382 /* For Vector input */
1383 template <int N>
1384 __ESIMD_NS::simd<float, N> sin_emu(__ESIMD_NS::simd<float, N> x);
1385 /* scalar Input */
1386 template <typename T> float sin_emu(T x);
1387 
1388 /* cos_emu - EU emulation for cos(x) */
1389 /* For Vector input */
1390 template <int N>
1391 __ESIMD_NS::simd<float, N> cos_emu(__ESIMD_NS::simd<float, N> x);
1392 
1393 /* scalar Input */
1394 template <typename T> float cos_emu(T x);
1395 
1396 /* tanh_cody_waite - Cody-Waite implementation for tanh(x) */
1397 /* float input */
1398 float tanh_cody_waite(float x);
1399 /* vector input */
1400 template <int N>
1401 __ESIMD_NS::simd<float, N> tanh_cody_waite(__ESIMD_NS::simd<float, N> x);
1402 /* tanh - opencl like implementation for tanh(x) */
1403 /* float input */
1404 float tanh(float x);
1405 /* vector input */
1406 template <int N> __ESIMD_NS::simd<float, N> tanh(__ESIMD_NS::simd<float, N> x);
1407 
1408 /* ------------------------- Extended Math Routines
1409  * -------------------------------------------------*/
1410 
1411 // For vector input
1412 template <int N>
1413 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1414 atan2_fast(__ESIMD_NS::simd<float, N> y, __ESIMD_NS::simd<float, N> x) {
1415  /* smallest such that 1.0+CONST_DBL_EPSILON != 1.0 */
1416  constexpr float CONST_DBL_EPSILON = 0.00001f;
1417  __ESIMD_NS::simd<float, N> OneP(1.0f);
1418  __ESIMD_NS::simd<float, N> OneN(-1.0f);
1419  __ESIMD_NS::simd<float, N> sign;
1420  __ESIMD_NS::simd<float, N> atan2;
1421  __ESIMD_NS::simd<float, N> r;
1422  __ESIMD_NS::simd_mask<N> mask = x < 0;
1423  __ESIMD_NS::simd<float, N> abs_y = __ESIMD_NS::abs(y) + CONST_DBL_EPSILON;
1424 
1425  r.merge((x + abs_y) / (abs_y - x), (x - abs_y) / (x + abs_y), mask);
1426  atan2.merge(float(detail::__ESIMD_CONST_PI) * 0.75f,
1427  float(detail::__ESIMD_CONST_PI) * 0.25f, mask);
1428  atan2 += (0.1963f * r * r - 0.9817f) * r;
1429 
1430  sign.merge(OneN, OneP, y < 0);
1431 
1432  return atan2 * sign;
1433 }
1434 
1435 // For Scalar Input
1436 template <> ESIMD_INLINE float atan2_fast(float y, float x) {
1437  __ESIMD_NS::simd<float, 1> vy = y;
1438  __ESIMD_NS::simd<float, 1> vx = x;
1439  __ESIMD_NS::simd<float, 1> atan2 = esimd::atan2_fast(vy, vx);
1440  return atan2[0];
1441 }
1442 
1443 // atan2
1444 // For Vector input
1445 template <int N>
1446 ESIMD_INLINE __ESIMD_NS::simd<float, N> atan2(__ESIMD_NS::simd<float, N> y,
1447  __ESIMD_NS::simd<float, N> x) {
1448  __ESIMD_NS::simd<float, N> atan2;
1449  __ESIMD_NS::simd_mask<N> mask;
1450  __ESIMD_NS::simd<float, N> atan = esimd::atan(y / x);
1451 
1452  constexpr float CONST_DBL_EPSILON = 0.00001f;
1453 
1454  mask = (__ESIMD_NS::abs(x) < CONST_DBL_EPSILON && y < -CONST_DBL_EPSILON);
1455  atan2.merge(float(-detail::__ESIMD_CONST_PI) / 2.f, 0.f, mask);
1456  mask = (__ESIMD_NS::abs(x) < CONST_DBL_EPSILON && y > CONST_DBL_EPSILON);
1457  atan2.merge(float(detail::__ESIMD_CONST_PI) / 2.f, mask);
1458  mask = (x < -CONST_DBL_EPSILON && y < -CONST_DBL_EPSILON);
1459  atan2.merge(atan - float(detail::__ESIMD_CONST_PI), mask);
1460  mask = (x < -CONST_DBL_EPSILON && y >= -CONST_DBL_EPSILON);
1461  atan2.merge(atan + float(detail::__ESIMD_CONST_PI), mask);
1462  mask = (x > CONST_DBL_EPSILON);
1463  atan2.merge(atan, mask);
1464 
1465  return atan2;
1466 }
1467 
1468 // For Scalar Input
1469 template <> ESIMD_INLINE float atan2(float y, float x) {
1470  __ESIMD_NS::simd<float, 1> vy = y;
1471  __ESIMD_NS::simd<float, 1> vx = x;
1472  __ESIMD_NS::simd<float, 1> atan2 = esimd::atan2(vy, vx);
1473  return atan2[0];
1474 }
1475 
1476 // fmod:
1477 // For Vector input
1478 template <int N>
1479 ESIMD_INLINE __ESIMD_NS::simd<float, N> fmod(__ESIMD_NS::simd<float, N> y,
1480  __ESIMD_NS::simd<float, N> x) {
1481  __ESIMD_NS::simd<float, N> abs_x = __ESIMD_NS::abs(x);
1482  __ESIMD_NS::simd<float, N> abs_y = __ESIMD_NS::abs(y);
1483 
1484  auto fmod_sign_mask = (y.template bit_cast_view<int32_t>()) & 0x80000000;
1485 
1486  __ESIMD_NS::simd<float, N> reminder =
1487  abs_y - abs_x * __ESIMD_NS::trunc<float>(abs_y / abs_x);
1488 
1489  abs_x.merge(0.0f, reminder >= 0);
1490  __ESIMD_NS::simd<float, N> fmod = reminder + abs_x;
1491  __ESIMD_NS::simd<float, N> fmod_abs = __ESIMD_NS::abs(fmod);
1492 
1493  auto fmod_bits =
1494  (fmod_abs.template bit_cast_view<int32_t>()) | fmod_sign_mask;
1495  return fmod_bits.template bit_cast_view<float>();
1496 }
1497 
1498 // For Scalar Input
1499 template <> ESIMD_INLINE float fmod(float y, float x) {
1500  return fmod(__ESIMD_NS::simd<float, 1>(y), __ESIMD_NS::simd<float, 1>(x))[0];
1501 }
1502 
1503 // sin_emu - EU emulation for sin(x)
1504 // For Vector input
1505 template <int N>
1506 ESIMD_INLINE __ESIMD_NS::simd<float, N> sin_emu(__ESIMD_NS::simd<float, N> x) {
1507  __ESIMD_NS::simd<float, N> x1;
1508  __ESIMD_NS::simd<float, N> x2;
1509  __ESIMD_NS::simd<float, N> t3;
1510 
1511  __ESIMD_NS::simd<float, N> sign;
1512  __ESIMD_NS::simd<float, N> fTrig;
1513  __ESIMD_NS::simd<float, N> TwoPI(float(detail::__ESIMD_CONST_PI) * 2.0f);
1514  __ESIMD_NS::simd<float, N> CmpI((float)detail::__ESIMD_CONST_PI);
1515  __ESIMD_NS::simd<float, N> OneP(1.0f);
1516  __ESIMD_NS::simd<float, N> OneN(-1.0f);
1517 
1518  x = esimd::fmod(x, TwoPI);
1519  x.merge(TwoPI + x, x < 0);
1520 
1521  x1.merge(CmpI - x, x - CmpI, (x <= float(detail::__ESIMD_CONST_PI)));
1522  x1.merge(x, (x <= float(detail::__ESIMD_CONST_PI) * 0.5f));
1523  x1.merge(TwoPI - x, (x > float(detail::__ESIMD_CONST_PI) * 1.5f));
1524 
1525  sign.merge(OneN, OneP, (x > float(detail::__ESIMD_CONST_PI)));
1526 
1527  x2 = x1 * x1;
1528  t3 = x2 * x1 * 0.1666667f;
1529 
1530  fTrig =
1531  x1 + t3 * (OneN + x2 * 0.05f *
1532  (OneP + x2 * 0.0238095f *
1533  (OneN + x2 * 0.0138889f *
1534  (OneP - x2 * 0.0090909f))));
1535  fTrig *= sign;
1536  return fTrig;
1537 }
1538 
1539 // scalar Input
1540 template <> ESIMD_INLINE float sin_emu(float x0) {
1541  return esimd::sin_emu(__ESIMD_NS::simd<float, 1>(x0))[0];
1542 }
1543 
1544 // cos_emu - EU emulation for sin(x)
1545 // For Vector input
1546 template <int N>
1547 ESIMD_INLINE __ESIMD_NS::simd<float, N> cos_emu(__ESIMD_NS::simd<float, N> x) {
1548  return esimd::sin_emu(0.5f * float(detail::__ESIMD_CONST_PI) - x);
1549 }
1550 
1551 // scalar Input
1552 template <> ESIMD_INLINE float cos_emu(float x0) {
1553  return esimd::cos_emu(__ESIMD_NS::simd<float, 1>(x0))[0];
1554 }
1555 
1557 namespace detail {
1558 
1559 template <int N>
1560 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1561 tanh_cody_waite_impl(__ESIMD_NS::simd<float, N> x) {
1562  /*
1563  * 0 x_small x_medium x_large
1564  * | x | rational polynomial | 1 - 2/(1 + exp(2*x)) | 1
1565  *
1566  * rational polynomial for single precision = x + x * (g * (p[1] * g + p[0]) /
1567  * (g + q[0]) g = x^2 p0 = -0.82377 28127 E+00 p1 = -0.38310 10665 E-02 q0 =
1568  * 0.24713 19654 E+01 q1 = 1.00000 00000 E+00
1569  *
1570  */
1571 
1572  constexpr float p0 = -0.8237728127E+00f;
1573  constexpr float p1 = -0.3831010665E-02f;
1574  constexpr float q0 = 0.2471319654E+01f;
1575  constexpr float q1 = 1.0000000000E+00f;
1576  constexpr float xsmall = 4.22863966691620432990E-04f;
1577  constexpr float xmedium = 0.54930614433405484570f;
1578  constexpr float xlarge = 8.66433975699931636772f;
1579 
1580  using RT = __ESIMD_NS::simd<float, N>;
1581 
1582  RT absX = __ESIMD_NS::abs(x);
1583  RT g = absX * absX;
1584 
1585  RT sign;
1586  sign.merge(-1.f, 1.f, x < 0.f);
1587 
1588  auto isLarge = absX > xlarge;
1589  auto minor = absX <= xlarge;
1590  auto isGtMed = minor & (absX > xmedium);
1591  auto isGtSmall = (absX > xsmall) & (absX <= xmedium);
1592 
1593  RT res;
1594  res.merge(sign, x, isLarge);
1595  auto temp = __ESIMD_NS::exp(absX * 2.0f) + 1.f;
1596  temp = ((temp - 2.f) / temp) * sign;
1597  res.merge(temp, isGtMed);
1598  res.merge((absX + absX * g * (g * p1 + p0) / (g + q0)) * sign, isGtSmall);
1599 
1600  return res;
1601 }
1602 
1603 template <int N>
1604 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1605 tanh_impl(__ESIMD_NS::simd<float, N> x) {
1606  /*
1607  * 0 x_small x_large
1608  * | x | ( exp(x) - exp(-x) ) / ( exp(x) + exp(-x) ) | 1
1609  *
1610  */
1611 
1612  constexpr float xsmall = 0.000045f; // same as exp(-10.0f)
1613  constexpr float xlarge = 40.f;
1614 
1615  using RT = __ESIMD_NS::simd<float, N>;
1616 
1617  RT absX = __ESIMD_NS::abs(x);
1618 
1619  RT sign;
1620  sign.merge(-1.f, 1.f, x < 0.f);
1621 
1622  auto isLarge = (absX > xlarge);
1623  auto isLessE = (absX <= xlarge);
1624 
1625  RT res;
1626  res.merge(sign, x, isLarge);
1627 
1628  RT exp;
1629  exp = __ESIMD_NS::exp(absX * 2.f);
1630 
1631  res.merge(((exp - 1.f) / (exp + 1.f)) * sign, (absX > xsmall) & isLessE);
1632 
1633  return res;
1634 }
1635 } // namespace detail
1637 
1638 /* tanh_cody_waite - Cody-Waite implementation for tanh(x) */
1639 /* float input */
1640 ESIMD_INLINE float tanh_cody_waite(float x) {
1641  return detail::tanh_cody_waite_impl(__ESIMD_NS::simd<float, 1>(x))[0];
1642 }
1643 /* vector input */
1644 template <int N>
1645 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1646 tanh_cody_waite(__ESIMD_NS::simd<float, N> x) {
1647  return detail::tanh_cody_waite_impl(x);
1648 }
1649 
1650 /* tanh - opencl like implementation for tanh(x) */
1651 /* float input */
1652 ESIMD_INLINE float tanh(float x) {
1653  return esimd::detail::tanh_impl(__ESIMD_NS::simd<float, 1>(x))[0];
1654 }
1655 /* vector input */
1656 template <int N>
1657 ESIMD_INLINE __ESIMD_NS::simd<float, N> tanh(__ESIMD_NS::simd<float, N> x) {
1658  return esimd::detail::tanh_impl(x);
1659 }
1660 
1661 template <typename T, int N>
1662 __ESIMD_NS::simd<T, N> dp4(__ESIMD_NS::simd<T, N> v1,
1663  __ESIMD_NS::simd<T, N> v2) {
1664  auto retv = __esimd_dp4<T, N>(v1.data(), v2.data());
1665  return retv;
1666 }
1667 
1675 template <int N>
1676 ESIMD_INLINE __ESIMD_NS::simd<sycl::half, N>
1677 srnd(__ESIMD_NS::simd<float, N> src0, __ESIMD_NS::simd<uint16_t, N> src1) {
1678  return __esimd_srnd<N>(src0.data(), src1.data());
1679 }
1680 
1682 
1685 
1690 using bfn_t __SYCL_DEPRECATED("Please use sycl::ext::intel::esimd::bfn_t") =
1692 
1701 template <bfn_t FuncControl, typename T, int N>
1703  "Please use sycl::ext::intel::esimd::bfn<FuncControl>(src0, src1, src2);")
1704 __ESIMD_API std::enable_if_t<std::is_integral_v<T>, __ESIMD_NS::simd<T, N>> bfn(
1705  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
1706  __ESIMD_NS::simd<T, N> src2) {
1707  return __ESIMD_NS::bfn<FuncControl>(src0, src1, src2);
1708 }
1709 
1717 template <bfn_t FuncControl, typename T>
1719  "Please use sycl::ext::intel::esimd::bfn<FuncControl>(src0, src1, src2);")
1720 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T>::value &&
1721  std::is_integral_v<T>,
1722  T> bfn(T src0, T src1, T src2) {
1723  return __ESIMD_NS::bfn<FuncControl>(src0, src1, src2);
1724 }
1725 
1728 __SYCL_DEPRECATED("Please use sycl::ext::intel::esimd::rdtsc();")
1729 ESIMD_INLINE uint64_t rdtsc() { return __ESIMD_NS::rdtsc(); }
1730 
1738 template <typename T, int N>
1739 ESIMD_INLINE __ESIMD_NS::simd<T, N> fma(__ESIMD_NS::simd<T, N> a,
1740  __ESIMD_NS::simd<T, N> b,
1741  __ESIMD_NS::simd<T, N> c) {
1742  static_assert(__ESIMD_DNS::is_generic_floating_point_v<T>,
1743  "fma only supports floating point types");
1744  using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
1745  auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t<CppT>, N>(
1746  __ESIMD_DNS::convert_vector<CppT, T, N>(a.data()),
1747  __ESIMD_DNS::convert_vector<CppT, T, N>(b.data()),
1748  __ESIMD_DNS::convert_vector<CppT, T, N>(c.data()));
1749  return __ESIMD_DNS::convert_vector<T, CppT, N>(Ret);
1750 }
1751 
1753 
1754 } // namespace ext::intel::experimental::esimd
1755 } // namespace _V1
1756 } // namespace sycl
Definition: simd.hpp:1387
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > lsr(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Logical Shift Right (vector version)
Definition: math.hpp:127
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&sycl::ext::intel::esimd::detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), sycl::ext::intel::esimd::simd< T0, SZ > > ror(sycl::ext::intel::esimd::simd< T1, SZ > src0, sycl::ext::intel::esimd::simd< T1, SZ > src1)
Rotate right operation with two vector inputs.
Definition: math.hpp:353
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&sycl::ext::intel::esimd::detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), sycl::ext::intel::esimd::simd< T0, SZ > > rol(sycl::ext::intel::esimd::simd< T1, SZ > src0, sycl::ext::intel::esimd::simd< T1, SZ > src1)
Rotate left operation with two vector inputs.
Definition: math.hpp:287
__ESIMD_API std::enable_if_t< std::is_integral< T1 >::value, sycl::ext::intel::esimd::simd< T0, SZ > > bf_insert(U src0, V src1, W src2, sycl::ext::intel::esimd::simd< T1, SZ > src3)
bf_insert
Definition: math.hpp:1183
__ESIMD_API std::enable_if_t< std::is_integral< T1 >::value, sycl::ext::intel::esimd::simd< T0, SZ > > bf_extract(U src0, V src1, sycl::ext::intel::esimd::simd< T1, SZ > src2)
bf_extract
Definition: math.hpp:1210
__ESIMD_API sycl::ext::intel::esimd::simd< T0, SZ > bf_reverse(sycl::ext::intel::esimd::simd< T1, SZ > src0)
bf_reverse
Definition: math.hpp:1163
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > asr(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Arithmetical Shift Right (vector version)
Definition: math.hpp:184
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > shl(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Shift left operation (vector version)
Definition: math.hpp:43
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > shr(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Shift right operation (vector version)
Definition: math.hpp:239
ESIMD_INLINE sycl::ext::intel::esimd::simd< T, N > fma(sycl::ext::intel::esimd::simd< T, N > a, sycl::ext::intel::esimd::simd< T, N > b, sycl::ext::intel::esimd::simd< T, N > c)
Performs a fused multiply add computation with three vector operands.
Definition: math.hpp:1739
ESIMD_INLINE uint64_t rdtsc()
rdtsc - get the value of timestamp counter.
Definition: math.hpp:1729
bfn_t
This enum is used to encode all possible logical operations performed on the 3 input operands.
Definition: math.hpp:1086
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< __ESIMD_DNS::is_esimd_scalar< T >::value &&std::is_integral_v< T >, T > bfn(T src0, T src1, T src2)
Performs binary function computation with three scalar operands.
Definition: math.hpp:1168
__ESIMD_API T cos(T src, Sat sat={})
Scalar version.
Definition: math.hpp:405
__ESIMD_API T sin(T src, Sat sat={})
Scalar version.
Definition: math.hpp:401
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > imul_impl(sycl::ext::intel::esimd::simd< T, N > &rmd, sycl::ext::intel::esimd::simd< T0, N > src0, sycl::ext::intel::esimd::simd< T1, N > src1)
Computes the 64-bit result of two 32-bit element vectors src0 and src1 multiplication.
Definition: math.hpp:415
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T1 >::value &&std::is_floating_point< T1 >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > dp4(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
Definition: math.hpp:945
__ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1)
Performs add with carry of a unsigned 32-bit scalars.
Definition: math.hpp:1231
__ESIMD_API std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > acos(sycl::ext::intel::esimd::simd< T, SZ > src0)
Definition: math.hpp:1298
ESIMD_INLINE sycl::ext::intel::esimd::simd< sycl::half, N > srnd(sycl::ext::intel::esimd::simd< float, N > src0, sycl::ext::intel::esimd::simd< uint16_t, N > src1)
srnd - perform stochastic rounding.
Definition: math.hpp:1677
__ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1)
Performs substraction with borrow of 2 unsigned 32-bit scalars.
Definition: math.hpp:1296
__ESIMD_API SZ simd< T, SZ > Sat sat
Definition: math.hpp:180
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T1 >::value &&std::is_floating_point< T1 >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > dp3(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
Definition: math.hpp:910
sycl::ext::intel::esimd::simd< float, N > cos_emu(sycl::ext::intel::esimd::simd< float, N > x)
Definition: math.hpp:1547
__ESIMD_API sycl::ext::intel::esimd::simd< RT, SZ > lzd(sycl::ext::intel::esimd::simd< T0, SZ > src0, Sat sat={})
Definition: math.hpp:1083
sycl::ext::intel::esimd::simd< float, N > sin_emu(sycl::ext::intel::esimd::simd< float, N > x)
Definition: math.hpp:1506
__SYCL_DEPRECATED("Please use sycl::ext::intel::esimd::addc(carry, src0, src1);") __ESIMD_API sycl
Definition: math.hpp:500
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > div(sycl::ext::intel::esimd::simd< T, SZ > &remainder, sycl::ext::intel::esimd::simd< T, SZ > src0, U src1)
Integral division with a vector dividend and a scalar divisor.
Definition: math.hpp:642
__ESIMD_API sycl::ext::intel::esimd::simd< float, SZ > sincos(sycl::ext::intel::esimd::simd< float, SZ > &dstcos, U src0, Sat sat={})
Definition: math.hpp:1240
__ESIMD_API SZ simd< T, SZ > Sat int SZ
Definition: math.hpp:211
ESIMD_DETAIL __ESIMD_API sycl::ext::intel::esimd::simd< T, SZ > atan(sycl::ext::intel::esimd::simd< T, SZ > src0)
Definition: math.hpp:1254
__ESIMD_API simd< T, SZ >(min)(simd< T
Selects component-wise the minimum of the two vectors.
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > quot(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1)
Integral quotient (vector version)
Definition: math.hpp:576
sycl::ext::intel::esimd::simd< float, N > fmod(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
Definition: math.hpp:1479
sycl::ext::intel::esimd::simd< float, N > atan2_fast(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
Definition: math.hpp:1414
__ESIMD_API std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > asin(sycl::ext::intel::esimd::simd< T, SZ > src0)
Definition: math.hpp:1336
ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat={})
Definition: math.hpp:497
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > dph(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
Definition: math.hpp:981
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > lrp(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1, V src2, Sat sat={})
Definition: math.hpp:1141
__ESIMD_API sycl::ext::intel::esimd::simd< T, SZ > frc(sycl::ext::intel::esimd::simd< T, SZ > src0)
Performs component-wise truncate-to-minus-infinity fraction operation of src0.
Definition: math.hpp:1064
__ESIMD_API SZ simd< T, SZ > Sat int class Sat
Definition: math.hpp:211
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T1 >::value &&std::is_floating_point< T1 >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > dp2(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
Definition: math.hpp:876
__ESIMD_API std::enable_if_t< detail::is_esimd_scalar< T1 >::value, std::remove_const_t< T1 > > abs(T1 src0)
Get absolute value (scalar version).
Definition: math.hpp:166
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > imul(sycl::ext::intel::esimd::simd< T, N > &rmd, sycl::ext::intel::esimd::simd< T0, N > src0, sycl::ext::intel::esimd::simd< T1, N > src1)
Computes the 64-bit multiply result of two 32-bit integer vectors src0 and src1.
Definition: math.hpp:448
sycl::ext::intel::esimd::simd< float, N > atan2(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
Definition: math.hpp:1446
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > line(sycl::ext::intel::esimd::simd< T, 4 > src0, sycl::ext::intel::esimd::simd< T, SZ > src1, Sat sat={})
Linear equation.
Definition: math.hpp:1013
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > mod(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1)
Modulo (vector version)
Definition: math.hpp:608
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
std::enable_if_t< std::is_same_v< Tp, sycl::half2 >, sycl::half2 > inv(Tp x)
Definition: math.hpp:152
std::enable_if_t< std::is_same_v< Tp, sycl::half2 >, sycl::half2 > rsqrt(Tp x)
Definition: math.hpp:215
auto autodecltype(a) b
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > exp(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
autodecltype(x) x
Definition: access.hpp:18