DPC++ Runtime
Runtime libraries for oneAPI DPC++
integer_functions.cpp
Go to the documentation of this file.
1 //==------------------- integer_functions.cpp ------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
11 #include "host_helper_macros.hpp"
12 
13 namespace {
14 // A helper function for mul_hi built-in for long
15 template <typename T> inline T __get_high_half(T a0b0, T a0b1, T a1b0, T a1b1) {
16  constexpr int halfsize = (sizeof(T) * 8) / 2;
17  // To get the upper 64 bits:
18  // 64 bits from a1b1, upper 32 bits from [a1b0 + (a0b1 + a0b0>>32 (carry bit
19  // in 33rd bit))] with carry bit on 64th bit - use of hadd. Add the a1b1 to
20  // the above 32 bit result.
21  return a1b1 +
22  (sycl::hadd(a1b0, (a0b1 + (a0b0 >> halfsize))) >> (halfsize - 1));
23 }
24 
25 // A helper function for mul_hi built-in for long
26 template <typename T>
27 inline void __get_half_products(T a, T b, T &a0b0, T &a0b1, T &a1b0, T &a1b1) {
28  constexpr sycl::cl_int halfsize = (sizeof(T) * 8) / 2;
29  T a1 = a >> halfsize;
30  T a0 = (a << halfsize) >> halfsize;
31  T b1 = b >> halfsize;
32  T b0 = (b << halfsize) >> halfsize;
33 
34  // a1b1 - for bits - [64-128)
35  // a1b0 a0b1 for bits - [32-96)
36  // a0b0 for bits - [0-64)
37  a1b1 = a1 * b1;
38  a0b1 = a0 * b1;
39  a1b0 = a1 * b0;
40  a0b0 = a0 * b0;
41 }
42 
43 // T is minimum of 64 bits- long or longlong
44 template <typename T> inline T __u_long_mul_hi(T a, T b) {
45  T a0b0, a0b1, a1b0, a1b1;
46  __get_half_products(a, b, a0b0, a0b1, a1b0, a1b1);
47  T result = __get_high_half(a0b0, a0b1, a1b0, a1b1);
48  return result;
49 }
50 
51 template <typename T> inline T __s_long_mul_hi(T a, T b) {
52  using UT = std::make_unsigned_t<T>;
53  UT absA = std::abs(a);
54  UT absB = std::abs(b);
55 
56  UT a0b0, a0b1, a1b0, a1b1;
57  __get_half_products(absA, absB, a0b0, a0b1, a1b0, a1b1);
58  T result = __get_high_half(a0b0, a0b1, a1b0, a1b1);
59 
60  bool isResultNegative = (a < 0) != (b < 0);
61  if (isResultNegative) {
62  result = ~result;
63 
64  // Find the low half to see if we need to carry
65  constexpr int halfsize = (sizeof(T) * 8) / 2;
66  UT low = a0b0 + ((a0b1 + a1b0) << halfsize);
67  if (low == 0)
68  ++result;
69  }
70 
71  return result;
72 }
73 } // namespace
74 
75 namespace sycl {
76 inline namespace _V1 {
77 #define BUILTIN_GENINT(NUM_ARGS, NAME, IMPL) \
78  HOST_IMPL(NAME, IMPL) \
79  FOR_EACH2(EXPORT_SCALAR, NUM_ARGS, NAME, INTEGER_TYPES) \
80  EXPORT_VEC_1_16(NUM_ARGS, NAME, FIXED_WIDTH_INTEGER_TYPES)
81 #define BUILTIN_GENINT_SU(NUM_ARGS, NAME, IMPL) \
82  BUILTIN_GENINT(NUM_ARGS, NAME, IMPL)
83 
84 BUILTIN_GENINT(ONE_ARG, abs, [](auto x) -> decltype(x) {
85  if constexpr (std::is_signed_v<decltype(x)>) {
86  return std::abs(x);
87  } else {
88  return x;
89  }
90 })
91 
92 BUILTIN_GENINT_SU(TWO_ARGS, abs_diff, [](auto x, auto y) -> decltype(x) {
93  if constexpr (std::is_signed_v<decltype(x)>)
94  if ((x < 0) != (y < 0))
95  return std::abs(x) + std::abs(y);
96 
97  return std::max(x, y) - std::min(x, y);
98 })
99 
100 BUILTIN_GENINT_SU(TWO_ARGS, add_sat, [](auto x, auto y) -> decltype(x) {
101  using T = decltype(x);
102  if constexpr (std::is_signed_v<T>) {
103  if (x > 0 && y > 0)
104  return (x < (std::numeric_limits<T>::max() - y)
105  ? (x + y)
107  if (x < 0 && y < 0)
108  return (x > (std::numeric_limits<T>::min() - y)
109  ? (x + y)
111  return x + y;
112  } else {
113  return (x < (std::numeric_limits<T>::max() - y)
114  ? x + y
116  }
117 })
118 
119 BUILTIN_GENINT_SU(TWO_ARGS, hadd, [](auto x, auto y) -> decltype(x) {
120  const decltype(x) one = 1;
121  return (x >> one) + (y >> one) + ((y & x) & one);
122 })
123 
124 BUILTIN_GENINT_SU(TWO_ARGS, rhadd, [](auto x, auto y) -> decltype(x) {
125  const decltype(x) one = 1;
126  return (x >> one) + (y >> one) + ((y | x) & one);
127 })
128 
130  [](auto x, auto y, auto z) -> decltype(x) {
131  return sycl::mul_hi(x, y) + z;
132  })
133 
135  THREE_ARGS, mad_sat, [](auto a, auto b, auto c) -> decltype(a) {
136  using T = decltype(a);
137  if constexpr (std::is_signed_v<T>) {
138  if constexpr (sizeof(T) == 8) {
139  bool neg_prod = (a < 0) ^ (b < 0);
140  T mulhi = __s_long_mul_hi(a, b);
141 
142  // check mul_hi. If it is any value != 0.
143  // if prod is +ve, any value in mulhi means we need to saturate.
144  // if prod is -ve, any value in mulhi besides -1 means we need to
145  // saturate.
146  if (!neg_prod && mulhi != 0)
148  if (neg_prod && mulhi != -1)
149  return std::numeric_limits<T>::min(); // essentially some other
150  // negative value.
151  return sycl::add_sat(T(a * b), c);
152  } else {
153  using UPT = sycl::detail::make_larger_t<T>;
154  UPT mul = UPT(a) * UPT(b);
155  UPT res = mul + UPT(c);
156  const UPT max = std::numeric_limits<T>::max();
157  const UPT min = std::numeric_limits<T>::min();
158  res = std::min(std::max(res, min), max);
159  return T(res);
160  }
161  } else {
162  if constexpr (sizeof(T) == 8) {
163  T mulhi = __u_long_mul_hi(a, b);
164  // check mul_hi. If it is any value != 0.
165  if (mulhi != 0)
167  return sycl::add_sat(T(a * b), c);
168  } else {
169  using UPT = sycl::detail::make_larger_t<T>;
170  UPT mul = UPT(a) * UPT(b);
171  const UPT min = std::numeric_limits<T>::min();
172  const UPT max = std::numeric_limits<T>::max();
173  mul = std::min(std::max(mul, min), max);
174  return sycl::add_sat(T(mul), c);
175  }
176  }
177  })
178 
179 BUILTIN_GENINT_SU(TWO_ARGS, mul_hi, [](auto a, auto b) -> decltype(a) {
180  using T = decltype(a);
181  if constexpr (sizeof(T) == 8) {
182  if constexpr (std::is_signed_v<T>)
183  return __s_long_mul_hi(a, b);
184  else
185  return __u_long_mul_hi(a, b);
186  } else {
187  using UPT = sycl::detail::make_larger_t<T>;
188  UPT a_s = a;
189  UPT b_s = b;
190  UPT mul = a_s * b_s;
191  return (mul >> (sizeof(T) * 8));
192  }
193 })
194 
195 BUILTIN_GENINT_SU(TWO_ARGS, sub_sat, [](auto x, auto y) -> decltype(x) {
196  using T = decltype(x);
197  if constexpr (std::is_signed_v<T>) {
198  using UT = std::make_unsigned_t<T>;
199  T result = UT(x) - UT(y);
200  // Saturate result if (+) - (-) = (-) or (-) - (+) = (+).
201  if (((x < 0) ^ (y < 0)) && ((x < 0) ^ (result < 0)))
204  return result;
205  } else {
206  return (y < (x - std::numeric_limits<T>::min()))
207  ? (x - y)
209  }
210 })
211 
213  [](auto x, auto y) -> decltype(x) { return x < y ? y : x; })
214 
216  [](auto x, auto y) -> decltype(x) { return y < x ? y : x; })
217 
218 BUILTIN_GENINT_SU(THREE_ARGS, clamp, [](auto x, auto y, auto z) -> decltype(x) {
219  return std::min(std::max(x, y), z);
220 })
221 
222 template <typename T> static inline constexpr T __clz_impl(T x, T m, T n = 0) {
223  return (x & m) ? n : __clz_impl(x, T(m >> 1), ++n);
224 }
225 template <typename T> static inline constexpr T __clz(T x) {
226  using UT = std::make_unsigned_t<T>;
227  return (x == T(0)) ? sizeof(T) * 8
228  : __clz_impl<UT>(x, sycl::detail::msbMask<UT>(x));
229 }
230 BUILTIN_GENINT(ONE_ARG, clz, __clz)
231 
232 template <typename T> static inline constexpr T __ctz_impl(T x, T m, T n = 0) {
233  return (x & m) ? n : __ctz_impl(x, T(m << 1), ++n);
234 }
235 
236 template <typename T> static inline constexpr T __ctz(T x) {
237  using UT = std::make_unsigned_t<T>;
238  return (x == T(0)) ? sizeof(T) * 8 : __ctz_impl<UT>(x, 1);
239 }
240 BUILTIN_GENINT(ONE_ARG, ctz, __ctz)
241 
242 BUILTIN_GENINT(TWO_ARGS, rotate, [](auto x, auto n) -> decltype(x) {
243  using T = decltype(x);
244  using UT = std::make_unsigned_t<T>;
245  // Shrink the shift width so that it's in the range [0, num_bits(T)). Cast
246  // everything to unsigned to avoid type conversion issues.
247  constexpr UT size = sizeof(x) * 8;
248  UT xu = UT(x);
249  UT nu = UT(n) & (size - 1);
250  return (xu << nu) | (xu >> (size - nu));
251 })
252 
253 template <typename T>
254 static inline constexpr T __popcount_impl(T x, size_t n = 0) {
255  return (x == T(0)) ? n : __popcount_impl(x >> 1, ((x & T(1)) ? ++n : n));
256 }
257 template <typename T> static inline constexpr T __popcount(T x) {
258  using UT = sycl::detail::make_unsigned_t<T>;
259  return __popcount_impl(UT(x));
260 }
261 BUILTIN_GENINT(ONE_ARG, popcount, __popcount)
262 } // namespace _V1
263 } // namespace sycl
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
Definition: math.hpp:134
std::int32_t cl_int
Definition: aliases.hpp:134
static constexpr T __clz_impl(T x, T m, T n=0)
static constexpr T __ctz(T x)
return std::max(x, y) - std hadd
auto autodecltype(a) b
auto auto autodecltype(x) z
return std::max(x, y) - std BUILTIN_GENINT_SU(TWO_ARGS, add_sat, [](auto x, auto y) -> decltype(x) { using T=decltype(x);if constexpr(std::is_signed_v< T >) { if(x > 0 &&y > 0) return(x<(std::numeric_limits< T >::max() - y) ?(x+y) :std::numeric_limits< T >::max());if(x< 0 &&y< 0) return(x >(std::numeric_limits< T >::min() - y) ?(x+y) :std::numeric_limits< T >::min());return x+y;} else { return(x<(std::numeric_limits< T >::max() - y) ? x+y :std::numeric_limits< T >::max());} }) BUILTIN_GENINT_SU(TWO_ARGS
BUILTIN_GENINT(ONE_ARG, abs, [](auto x) -> decltype(x) { if constexpr(std::is_signed_v< decltype(x)>) { return std::abs(x);} else { return x;} }) BUILTIN_GENINT_SU(TWO_ARGS
autodecltype(x) x
static constexpr T __ctz_impl(T x, T m, T n=0)
static constexpr T __clz(T x)
Definition: access.hpp:18
int popcount(const simd_mask< _Tp, _Abi > &) noexcept