15 template <
typename T>
inline T __get_high_half(T a0b0, T a0b1, T a1b0, T a1b1) {
16 constexpr
int halfsize = (
sizeof(T) * 8) / 2;
22 (
sycl::hadd(a1b0, (a0b1 + (a0b0 >> halfsize))) >> (halfsize - 1));
27 inline void __get_half_products(T
a, T
b, T &a0b0, T &a0b1, T &a1b0, T &a1b1) {
30 T a0 = (a << halfsize) >> halfsize;
32 T b0 = (b << halfsize) >> halfsize;
44 template <
typename T>
inline T __u_long_mul_hi(T
a, T
b) {
45 T a0b0, a0b1, a1b0, a1b1;
46 __get_half_products(
a,
b, a0b0, a0b1, a1b0, a1b1);
47 T result = __get_high_half(a0b0, a0b1, a1b0, a1b1);
51 template <
typename T>
inline T __s_long_mul_hi(T
a, T
b) {
52 using UT = std::make_unsigned_t<T>;
56 UT a0b0, a0b1, a1b0, a1b1;
57 __get_half_products(absA, absB, a0b0, a0b1, a1b0, a1b1);
58 T result = __get_high_half(a0b0, a0b1, a1b0, a1b1);
60 bool isResultNegative = (
a < 0) != (
b < 0);
61 if (isResultNegative) {
65 constexpr
int halfsize = (
sizeof(T) * 8) / 2;
66 UT low = a0b0 + ((a0b1 + a1b0) << halfsize);
76 inline namespace _V1 {
77 #define BUILTIN_GENINT(NUM_ARGS, NAME, IMPL) \
78 HOST_IMPL(NAME, IMPL) \
79 FOR_EACH2(EXPORT_SCALAR, NUM_ARGS, NAME, INTEGER_TYPES) \
80 EXPORT_VEC_1_16(NUM_ARGS, NAME, FIXED_WIDTH_INTEGER_TYPES)
81 #define BUILTIN_GENINT_SU(NUM_ARGS, NAME, IMPL) \
82 BUILTIN_GENINT(NUM_ARGS, NAME, IMPL)
85 if constexpr (std::is_signed_v<decltype(
x)>) {
93 if constexpr (std::is_signed_v<decltype(
x)>)
94 if ((
x < 0) != (
y < 0))
97 return std::max(
x,
y) - std::min(
x,
y);
101 using T = decltype(
x);
102 if constexpr (std::is_signed_v<T>) {
120 const decltype(
x) one = 1;
121 return (
x >> one) + (
y >> one) + ((
y &
x) & one);
125 const decltype(
x) one = 1;
126 return (
x >> one) + (
y >> one) + ((
y |
x) & one);
130 [](
auto x,
auto y,
auto z) -> decltype(
x) {
135 THREE_ARGS, mad_sat, [](
auto a,
auto b,
auto c) -> decltype(
a) {
136 using T = decltype(
a);
137 if constexpr (std::is_signed_v<T>) {
138 if constexpr (
sizeof(T) == 8) {
139 bool neg_prod = (
a < 0) ^ (
b < 0);
140 T
mulhi = __s_long_mul_hi(
a,
b);
146 if (!neg_prod &&
mulhi != 0)
148 if (neg_prod &&
mulhi != -1)
151 return sycl::add_sat(T(
a *
b), c);
153 using UPT = sycl::detail::make_larger_t<T>;
154 UPT
mul = UPT(
a) * UPT(
b);
155 UPT res =
mul + UPT(c);
158 res = std::min(std::max(res,
min),
max);
162 if constexpr (
sizeof(T) == 8) {
163 T
mulhi = __u_long_mul_hi(
a,
b);
167 return sycl::add_sat(T(
a *
b), c);
169 using UPT = sycl::detail::make_larger_t<T>;
170 UPT
mul = UPT(
a) * UPT(
b);
174 return sycl::add_sat(T(
mul), c);
180 using T = decltype(
a);
181 if constexpr (
sizeof(T) == 8) {
182 if constexpr (std::is_signed_v<T>)
183 return __s_long_mul_hi(
a,
b);
185 return __u_long_mul_hi(
a,
b);
187 using UPT = sycl::detail::make_larger_t<T>;
191 return (
mul >> (
sizeof(T) * 8));
196 using T = decltype(
x);
197 if constexpr (std::is_signed_v<T>) {
198 using UT = std::make_unsigned_t<T>;
199 T result = UT(
x) - UT(
y);
201 if (((
x < 0) ^ (
y < 0)) && ((
x < 0) ^ (result < 0)))
213 [](
auto x,
auto y) -> decltype(
x) {
return x <
y ?
y :
x; })
216 [](
auto x,
auto y) -> decltype(
x) {
return y <
x ?
y :
x; })
219 return std::min(std::max(
x,
y),
z);
222 template <typename T>
static inline constexpr T
__clz_impl(T
x, T m, T n = 0) {
225 template <
typename T>
static inline constexpr T
__clz(T
x) {
226 using UT = std::make_unsigned_t<T>;
227 return (
x == T(0)) ?
sizeof(T) * 8
228 : __clz_impl<UT>(
x, sycl::detail::msbMask<UT>(
x));
232 template <typename T> static inline constexpr T
__ctz_impl(T
x, T m, T n = 0) {
236 template <
typename T>
static inline constexpr T
__ctz(T
x) {
237 using UT = std::make_unsigned_t<T>;
238 return (
x == T(0)) ?
sizeof(T) * 8 : __ctz_impl<UT>(
x, 1);
243 using T = decltype(
x);
244 using UT = std::make_unsigned_t<T>;
247 constexpr UT size =
sizeof(
x) * 8;
249 UT nu = UT(n) & (size - 1);
250 return (xu << nu) | (xu >> (size - nu));
253 template <typename T>
254 static inline constexpr T __popcount_impl(T
x,
size_t n = 0) {
255 return (
x == T(0)) ? n : __popcount_impl(
x >> 1, ((
x & T(1)) ? ++n : n));
257 template <
typename T>
static inline constexpr T __popcount(T
x) {
258 using UT = sycl::detail::make_unsigned_t<T>;
259 return __popcount_impl(UT(
x));
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
static constexpr T __clz_impl(T x, T m, T n=0)
static constexpr T __ctz(T x)
return std::max(x, y) - std hadd
auto auto autodecltype(x) z
return std::max(x, y) - std BUILTIN_GENINT_SU(TWO_ARGS, add_sat, [](auto x, auto y) -> decltype(x) { using T=decltype(x);if constexpr(std::is_signed_v< T >) { if(x > 0 &&y > 0) return(x<(std::numeric_limits< T >::max() - y) ?(x+y) :std::numeric_limits< T >::max());if(x< 0 &&y< 0) return(x >(std::numeric_limits< T >::min() - y) ?(x+y) :std::numeric_limits< T >::min());return x+y;} else { return(x<(std::numeric_limits< T >::max() - y) ? x+y :std::numeric_limits< T >::max());} }) BUILTIN_GENINT_SU(TWO_ARGS
BUILTIN_GENINT(ONE_ARG, abs, [](auto x) -> decltype(x) { if constexpr(std::is_signed_v< decltype(x)>) { return std::abs(x);} else { return x;} }) BUILTIN_GENINT_SU(TWO_ARGS
static constexpr T __ctz_impl(T x, T m, T n=0)
static constexpr T __clz(T x)
int popcount(const simd_mask< _Tp, _Abi > &) noexcept