clang  19.0.0git
tmmintrin.h
Go to the documentation of this file.
1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __TMMINTRIN_H
11 #define __TMMINTRIN_H
12 
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
15 #endif
16 
17 #include <pmmintrin.h>
18 
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS \
21  __attribute__((__always_inline__, __nodebug__, \
22  __target__("ssse3,no-evex512"), __min_vector_width__(64)))
23 #define __DEFAULT_FN_ATTRS_MMX \
24  __attribute__((__always_inline__, __nodebug__, \
25  __target__("mmx,ssse3,no-evex512"), \
26  __min_vector_width__(64)))
27 
28 /// Computes the absolute value of each of the packed 8-bit signed
29 /// integers in the source operand and stores the 8-bit unsigned integer
30 /// results in the destination.
31 ///
32 /// \headerfile <x86intrin.h>
33 ///
34 /// This intrinsic corresponds to the \c PABSB instruction.
35 ///
36 /// \param __a
37 /// A 64-bit vector of [8 x i8].
38 /// \returns A 64-bit integer vector containing the absolute values of the
39 /// elements in the operand.
40 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
42 {
43  return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
44 }
45 
46 /// Computes the absolute value of each of the packed 8-bit signed
47 /// integers in the source operand and stores the 8-bit unsigned integer
48 /// results in the destination.
49 ///
50 /// \headerfile <x86intrin.h>
51 ///
52 /// This intrinsic corresponds to the \c VPABSB instruction.
53 ///
54 /// \param __a
55 /// A 128-bit vector of [16 x i8].
56 /// \returns A 128-bit integer vector containing the absolute values of the
57 /// elements in the operand.
58 static __inline__ __m128i __DEFAULT_FN_ATTRS
59 _mm_abs_epi8(__m128i __a)
60 {
61  return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
62 }
63 
64 /// Computes the absolute value of each of the packed 16-bit signed
65 /// integers in the source operand and stores the 16-bit unsigned integer
66 /// results in the destination.
67 ///
68 /// \headerfile <x86intrin.h>
69 ///
70 /// This intrinsic corresponds to the \c PABSW instruction.
71 ///
72 /// \param __a
73 /// A 64-bit vector of [4 x i16].
74 /// \returns A 64-bit integer vector containing the absolute values of the
75 /// elements in the operand.
76 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
78 {
79  return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
80 }
81 
82 /// Computes the absolute value of each of the packed 16-bit signed
83 /// integers in the source operand and stores the 16-bit unsigned integer
84 /// results in the destination.
85 ///
86 /// \headerfile <x86intrin.h>
87 ///
88 /// This intrinsic corresponds to the \c VPABSW instruction.
89 ///
90 /// \param __a
91 /// A 128-bit vector of [8 x i16].
92 /// \returns A 128-bit integer vector containing the absolute values of the
93 /// elements in the operand.
94 static __inline__ __m128i __DEFAULT_FN_ATTRS
96 {
97  return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
98 }
99 
100 /// Computes the absolute value of each of the packed 32-bit signed
101 /// integers in the source operand and stores the 32-bit unsigned integer
102 /// results in the destination.
103 ///
104 /// \headerfile <x86intrin.h>
105 ///
106 /// This intrinsic corresponds to the \c PABSD instruction.
107 ///
108 /// \param __a
109 /// A 64-bit vector of [2 x i32].
110 /// \returns A 64-bit integer vector containing the absolute values of the
111 /// elements in the operand.
112 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
114 {
115  return (__m64)__builtin_ia32_pabsd((__v2si)__a);
116 }
117 
118 /// Computes the absolute value of each of the packed 32-bit signed
119 /// integers in the source operand and stores the 32-bit unsigned integer
120 /// results in the destination.
121 ///
122 /// \headerfile <x86intrin.h>
123 ///
124 /// This intrinsic corresponds to the \c VPABSD instruction.
125 ///
126 /// \param __a
127 /// A 128-bit vector of [4 x i32].
128 /// \returns A 128-bit integer vector containing the absolute values of the
129 /// elements in the operand.
130 static __inline__ __m128i __DEFAULT_FN_ATTRS
132 {
133  return (__m128i)__builtin_elementwise_abs((__v4si)__a);
134 }
135 
136 /// Concatenates the two 128-bit integer vector operands, and
137 /// right-shifts the result by the number of bytes specified in the immediate
138 /// operand.
139 ///
140 /// \headerfile <x86intrin.h>
141 ///
142 /// \code
143 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
144 /// \endcode
145 ///
146 /// This intrinsic corresponds to the \c PALIGNR instruction.
147 ///
148 /// \param a
149 /// A 128-bit vector of [16 x i8] containing one of the source operands.
150 /// \param b
151 /// A 128-bit vector of [16 x i8] containing one of the source operands.
152 /// \param n
153 /// An immediate operand specifying how many bytes to right-shift the result.
154 /// \returns A 128-bit integer vector containing the concatenated right-shifted
155 /// value.
156 #define _mm_alignr_epi8(a, b, n) \
157  ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
158  (__v16qi)(__m128i)(b), (n)))
159 
160 /// Concatenates the two 64-bit integer vector operands, and right-shifts
161 /// the result by the number of bytes specified in the immediate operand.
162 ///
163 /// \headerfile <x86intrin.h>
164 ///
165 /// \code
166 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
167 /// \endcode
168 ///
169 /// This intrinsic corresponds to the \c PALIGNR instruction.
170 ///
171 /// \param a
172 /// A 64-bit vector of [8 x i8] containing one of the source operands.
173 /// \param b
174 /// A 64-bit vector of [8 x i8] containing one of the source operands.
175 /// \param n
176 /// An immediate operand specifying how many bytes to right-shift the result.
177 /// \returns A 64-bit integer vector containing the concatenated right-shifted
178 /// value.
179 #define _mm_alignr_pi8(a, b, n) \
180  ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
181 
182 /// Horizontally adds the adjacent pairs of values contained in 2 packed
183 /// 128-bit vectors of [8 x i16].
184 ///
185 /// \headerfile <x86intrin.h>
186 ///
187 /// This intrinsic corresponds to the \c VPHADDW instruction.
188 ///
189 /// \param __a
190 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
191 /// horizontal sums of the values are stored in the lower bits of the
192 /// destination.
193 /// \param __b
194 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
195 /// horizontal sums of the values are stored in the upper bits of the
196 /// destination.
197 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
198 /// both operands.
199 static __inline__ __m128i __DEFAULT_FN_ATTRS
200 _mm_hadd_epi16(__m128i __a, __m128i __b)
201 {
202  return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
203 }
204 
205 /// Horizontally adds the adjacent pairs of values contained in 2 packed
206 /// 128-bit vectors of [4 x i32].
207 ///
208 /// \headerfile <x86intrin.h>
209 ///
210 /// This intrinsic corresponds to the \c VPHADDD instruction.
211 ///
212 /// \param __a
213 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
214 /// horizontal sums of the values are stored in the lower bits of the
215 /// destination.
216 /// \param __b
217 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
218 /// horizontal sums of the values are stored in the upper bits of the
219 /// destination.
220 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
221 /// both operands.
222 static __inline__ __m128i __DEFAULT_FN_ATTRS
223 _mm_hadd_epi32(__m128i __a, __m128i __b)
224 {
225  return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
226 }
227 
228 /// Horizontally adds the adjacent pairs of values contained in 2 packed
229 /// 64-bit vectors of [4 x i16].
230 ///
231 /// \headerfile <x86intrin.h>
232 ///
233 /// This intrinsic corresponds to the \c PHADDW instruction.
234 ///
235 /// \param __a
236 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
237 /// horizontal sums of the values are stored in the lower bits of the
238 /// destination.
239 /// \param __b
240 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
241 /// horizontal sums of the values are stored in the upper bits of the
242 /// destination.
243 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
244 /// operands.
245 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
246 _mm_hadd_pi16(__m64 __a, __m64 __b)
247 {
248  return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
249 }
250 
251 /// Horizontally adds the adjacent pairs of values contained in 2 packed
252 /// 64-bit vectors of [2 x i32].
253 ///
254 /// \headerfile <x86intrin.h>
255 ///
256 /// This intrinsic corresponds to the \c PHADDD instruction.
257 ///
258 /// \param __a
259 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
260 /// horizontal sums of the values are stored in the lower bits of the
261 /// destination.
262 /// \param __b
263 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
264 /// horizontal sums of the values are stored in the upper bits of the
265 /// destination.
266 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
267 /// operands.
268 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
269 _mm_hadd_pi32(__m64 __a, __m64 __b)
270 {
271  return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
272 }
273 
274 /// Horizontally adds, with saturation, the adjacent pairs of values contained
275 /// in two packed 128-bit vectors of [8 x i16].
276 ///
277 /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
278 /// less than 0x8000 are saturated to 0x8000.
279 ///
280 /// \headerfile <x86intrin.h>
281 ///
282 /// This intrinsic corresponds to the \c VPHADDSW instruction.
283 ///
284 /// \param __a
285 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
286 /// horizontal sums of the values are stored in the lower bits of the
287 /// destination.
288 /// \param __b
289 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
290 /// horizontal sums of the values are stored in the upper bits of the
291 /// destination.
292 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
293 /// sums of both operands.
294 static __inline__ __m128i __DEFAULT_FN_ATTRS
295 _mm_hadds_epi16(__m128i __a, __m128i __b)
296 {
297  return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
298 }
299 
300 /// Horizontally adds, with saturation, the adjacent pairs of values contained
301 /// in two packed 64-bit vectors of [4 x i16].
302 ///
303 /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
304 /// less than 0x8000 are saturated to 0x8000.
305 ///
306 /// \headerfile <x86intrin.h>
307 ///
308 /// This intrinsic corresponds to the \c PHADDSW instruction.
309 ///
310 /// \param __a
311 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
312 /// horizontal sums of the values are stored in the lower bits of the
313 /// destination.
314 /// \param __b
315 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
316 /// horizontal sums of the values are stored in the upper bits of the
317 /// destination.
318 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319 /// sums of both operands.
320 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
321 _mm_hadds_pi16(__m64 __a, __m64 __b)
322 {
323  return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
324 }
325 
326 /// Horizontally subtracts the adjacent pairs of values contained in 2
327 /// packed 128-bit vectors of [8 x i16].
328 ///
329 /// \headerfile <x86intrin.h>
330 ///
331 /// This intrinsic corresponds to the \c VPHSUBW instruction.
332 ///
333 /// \param __a
334 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
335 /// horizontal differences between the values are stored in the lower bits of
336 /// the destination.
337 /// \param __b
338 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
339 /// horizontal differences between the values are stored in the upper bits of
340 /// the destination.
341 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342 /// of both operands.
343 static __inline__ __m128i __DEFAULT_FN_ATTRS
344 _mm_hsub_epi16(__m128i __a, __m128i __b)
345 {
346  return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
347 }
348 
349 /// Horizontally subtracts the adjacent pairs of values contained in 2
350 /// packed 128-bit vectors of [4 x i32].
351 ///
352 /// \headerfile <x86intrin.h>
353 ///
354 /// This intrinsic corresponds to the \c VPHSUBD instruction.
355 ///
356 /// \param __a
357 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
358 /// horizontal differences between the values are stored in the lower bits of
359 /// the destination.
360 /// \param __b
361 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
362 /// horizontal differences between the values are stored in the upper bits of
363 /// the destination.
364 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
365 /// of both operands.
366 static __inline__ __m128i __DEFAULT_FN_ATTRS
367 _mm_hsub_epi32(__m128i __a, __m128i __b)
368 {
369  return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
370 }
371 
372 /// Horizontally subtracts the adjacent pairs of values contained in 2
373 /// packed 64-bit vectors of [4 x i16].
374 ///
375 /// \headerfile <x86intrin.h>
376 ///
377 /// This intrinsic corresponds to the \c PHSUBW instruction.
378 ///
379 /// \param __a
380 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
381 /// horizontal differences between the values are stored in the lower bits of
382 /// the destination.
383 /// \param __b
384 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
385 /// horizontal differences between the values are stored in the upper bits of
386 /// the destination.
387 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
388 /// of both operands.
389 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
390 _mm_hsub_pi16(__m64 __a, __m64 __b)
391 {
392  return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
393 }
394 
395 /// Horizontally subtracts the adjacent pairs of values contained in 2
396 /// packed 64-bit vectors of [2 x i32].
397 ///
398 /// \headerfile <x86intrin.h>
399 ///
400 /// This intrinsic corresponds to the \c PHSUBD instruction.
401 ///
402 /// \param __a
403 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
404 /// horizontal differences between the values are stored in the lower bits of
405 /// the destination.
406 /// \param __b
407 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
408 /// horizontal differences between the values are stored in the upper bits of
409 /// the destination.
410 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
411 /// of both operands.
412 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
413 _mm_hsub_pi32(__m64 __a, __m64 __b)
414 {
415  return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
416 }
417 
418 /// Horizontally subtracts, with saturation, the adjacent pairs of values
419 /// contained in two packed 128-bit vectors of [8 x i16].
420 ///
421 /// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
422 /// Negative differences less than 0x8000 are saturated to 0x8000.
423 ///
424 /// \headerfile <x86intrin.h>
425 ///
426 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
427 ///
428 /// \param __a
429 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
430 /// horizontal differences between the values are stored in the lower bits of
431 /// the destination.
432 /// \param __b
433 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
434 /// horizontal differences between the values are stored in the upper bits of
435 /// the destination.
436 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
437 /// differences of both operands.
438 static __inline__ __m128i __DEFAULT_FN_ATTRS
439 _mm_hsubs_epi16(__m128i __a, __m128i __b)
440 {
441  return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
442 }
443 
444 /// Horizontally subtracts, with saturation, the adjacent pairs of values
445 /// contained in two packed 64-bit vectors of [4 x i16].
446 ///
447 /// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
448 /// Negative differences less than 0x8000 are saturated to 0x8000.
449 ///
450 /// \headerfile <x86intrin.h>
451 ///
452 /// This intrinsic corresponds to the \c PHSUBSW instruction.
453 ///
454 /// \param __a
455 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
456 /// horizontal differences between the values are stored in the lower bits of
457 /// the destination.
458 /// \param __b
459 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
460 /// horizontal differences between the values are stored in the upper bits of
461 /// the destination.
462 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
463 /// differences of both operands.
464 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
465 _mm_hsubs_pi16(__m64 __a, __m64 __b)
466 {
467  return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
468 }
469 
470 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
471 /// values contained in the first source operand and packed 8-bit signed
472 /// integer values contained in the second source operand, adds pairs of
473 /// contiguous products with signed saturation, and writes the 16-bit sums to
474 /// the corresponding bits in the destination.
475 ///
476 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
477 /// both operands are multiplied, and the sum of both results is written to
478 /// bits [15:0] of the destination.
479 ///
480 /// \headerfile <x86intrin.h>
481 ///
482 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
483 ///
484 /// \param __a
485 /// A 128-bit integer vector containing the first source operand.
486 /// \param __b
487 /// A 128-bit integer vector containing the second source operand.
488 /// \returns A 128-bit integer vector containing the sums of products of both
489 /// operands: \n
490 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
491 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
492 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
493 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
494 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
495 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
496 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
497 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
498 static __inline__ __m128i __DEFAULT_FN_ATTRS
499 _mm_maddubs_epi16(__m128i __a, __m128i __b)
500 {
501  return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
502 }
503 
504 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
505 /// values contained in the first source operand and packed 8-bit signed
506 /// integer values contained in the second source operand, adds pairs of
507 /// contiguous products with signed saturation, and writes the 16-bit sums to
508 /// the corresponding bits in the destination.
509 ///
510 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
511 /// both operands are multiplied, and the sum of both results is written to
512 /// bits [15:0] of the destination.
513 ///
514 /// \headerfile <x86intrin.h>
515 ///
516 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
517 ///
518 /// \param __a
519 /// A 64-bit integer vector containing the first source operand.
520 /// \param __b
521 /// A 64-bit integer vector containing the second source operand.
522 /// \returns A 64-bit integer vector containing the sums of products of both
523 /// operands: \n
524 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
525 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
526 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
527 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
528 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
529 _mm_maddubs_pi16(__m64 __a, __m64 __b)
530 {
531  return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
532 }
533 
534 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
535 /// products to the 18 most significant bits by right-shifting, rounds the
536 /// truncated value by adding 1, and writes bits [16:1] to the destination.
537 ///
538 /// \headerfile <x86intrin.h>
539 ///
540 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
541 ///
542 /// \param __a
543 /// A 128-bit vector of [8 x i16] containing one of the source operands.
544 /// \param __b
545 /// A 128-bit vector of [8 x i16] containing one of the source operands.
546 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
547 /// products of both operands.
548 static __inline__ __m128i __DEFAULT_FN_ATTRS
549 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
550 {
551  return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
552 }
553 
554 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
555 /// products to the 18 most significant bits by right-shifting, rounds the
556 /// truncated value by adding 1, and writes bits [16:1] to the destination.
557 ///
558 /// \headerfile <x86intrin.h>
559 ///
560 /// This intrinsic corresponds to the \c PMULHRSW instruction.
561 ///
562 /// \param __a
563 /// A 64-bit vector of [4 x i16] containing one of the source operands.
564 /// \param __b
565 /// A 64-bit vector of [4 x i16] containing one of the source operands.
566 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
567 /// products of both operands.
568 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
569 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
570 {
571  return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
572 }
573 
574 /// Copies the 8-bit integers from a 128-bit integer vector to the
575 /// destination or clears 8-bit values in the destination, as specified by
576 /// the second source operand.
577 ///
578 /// \headerfile <x86intrin.h>
579 ///
580 /// This intrinsic corresponds to the \c VPSHUFB instruction.
581 ///
582 /// \param __a
583 /// A 128-bit integer vector containing the values to be copied.
584 /// \param __b
585 /// A 128-bit integer vector containing control bytes corresponding to
586 /// positions in the destination:
587 /// Bit 7: \n
588 /// 1: Clear the corresponding byte in the destination. \n
589 /// 0: Copy the selected source byte to the corresponding byte in the
590 /// destination. \n
591 /// Bits [6:4] Reserved. \n
592 /// Bits [3:0] select the source byte to be copied.
593 /// \returns A 128-bit integer vector containing the copied or cleared values.
594 static __inline__ __m128i __DEFAULT_FN_ATTRS
595 _mm_shuffle_epi8(__m128i __a, __m128i __b)
596 {
597  return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
598 }
599 
600 /// Copies the 8-bit integers from a 64-bit integer vector to the
601 /// destination or clears 8-bit values in the destination, as specified by
602 /// the second source operand.
603 ///
604 /// \headerfile <x86intrin.h>
605 ///
606 /// This intrinsic corresponds to the \c PSHUFB instruction.
607 ///
608 /// \param __a
609 /// A 64-bit integer vector containing the values to be copied.
610 /// \param __b
611 /// A 64-bit integer vector containing control bytes corresponding to
612 /// positions in the destination:
613 /// Bit 7: \n
614 /// 1: Clear the corresponding byte in the destination. \n
615 /// 0: Copy the selected source byte to the corresponding byte in the
616 /// destination. \n
617 /// Bits [3:0] select the source byte to be copied.
618 /// \returns A 64-bit integer vector containing the copied or cleared values.
619 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
620 _mm_shuffle_pi8(__m64 __a, __m64 __b)
621 {
622  return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
623 }
624 
625 /// For each 8-bit integer in the first source operand, perform one of
626 /// the following actions as specified by the second source operand.
627 ///
628 /// If the byte in the second source is negative, calculate the two's
629 /// complement of the corresponding byte in the first source, and write that
630 /// value to the destination. If the byte in the second source is positive,
631 /// copy the corresponding byte from the first source to the destination. If
632 /// the byte in the second source is zero, clear the corresponding byte in
633 /// the destination.
634 ///
635 /// \headerfile <x86intrin.h>
636 ///
637 /// This intrinsic corresponds to the \c VPSIGNB instruction.
638 ///
639 /// \param __a
640 /// A 128-bit integer vector containing the values to be copied.
641 /// \param __b
642 /// A 128-bit integer vector containing control bytes corresponding to
643 /// positions in the destination.
644 /// \returns A 128-bit integer vector containing the resultant values.
645 static __inline__ __m128i __DEFAULT_FN_ATTRS
646 _mm_sign_epi8(__m128i __a, __m128i __b)
647 {
648  return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
649 }
650 
651 /// For each 16-bit integer in the first source operand, perform one of
652 /// the following actions as specified by the second source operand.
653 ///
654 /// If the word in the second source is negative, calculate the two's
655 /// complement of the corresponding word in the first source, and write that
656 /// value to the destination. If the word in the second source is positive,
657 /// copy the corresponding word from the first source to the destination. If
658 /// the word in the second source is zero, clear the corresponding word in
659 /// the destination.
660 ///
661 /// \headerfile <x86intrin.h>
662 ///
663 /// This intrinsic corresponds to the \c VPSIGNW instruction.
664 ///
665 /// \param __a
666 /// A 128-bit integer vector containing the values to be copied.
667 /// \param __b
668 /// A 128-bit integer vector containing control words corresponding to
669 /// positions in the destination.
670 /// \returns A 128-bit integer vector containing the resultant values.
671 static __inline__ __m128i __DEFAULT_FN_ATTRS
672 _mm_sign_epi16(__m128i __a, __m128i __b)
673 {
674  return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
675 }
676 
677 /// For each 32-bit integer in the first source operand, perform one of
678 /// the following actions as specified by the second source operand.
679 ///
680 /// If the doubleword in the second source is negative, calculate the two's
681 /// complement of the corresponding word in the first source, and write that
682 /// value to the destination. If the doubleword in the second source is
683 /// positive, copy the corresponding word from the first source to the
684 /// destination. If the doubleword in the second source is zero, clear the
685 /// corresponding word in the destination.
686 ///
687 /// \headerfile <x86intrin.h>
688 ///
689 /// This intrinsic corresponds to the \c VPSIGND instruction.
690 ///
691 /// \param __a
692 /// A 128-bit integer vector containing the values to be copied.
693 /// \param __b
694 /// A 128-bit integer vector containing control doublewords corresponding to
695 /// positions in the destination.
696 /// \returns A 128-bit integer vector containing the resultant values.
697 static __inline__ __m128i __DEFAULT_FN_ATTRS
698 _mm_sign_epi32(__m128i __a, __m128i __b)
699 {
700  return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
701 }
702 
703 /// For each 8-bit integer in the first source operand, perform one of
704 /// the following actions as specified by the second source operand.
705 ///
706 /// If the byte in the second source is negative, calculate the two's
707 /// complement of the corresponding byte in the first source, and write that
708 /// value to the destination. If the byte in the second source is positive,
709 /// copy the corresponding byte from the first source to the destination. If
710 /// the byte in the second source is zero, clear the corresponding byte in
711 /// the destination.
712 ///
713 /// \headerfile <x86intrin.h>
714 ///
715 /// This intrinsic corresponds to the \c PSIGNB instruction.
716 ///
717 /// \param __a
718 /// A 64-bit integer vector containing the values to be copied.
719 /// \param __b
720 /// A 64-bit integer vector containing control bytes corresponding to
721 /// positions in the destination.
722 /// \returns A 64-bit integer vector containing the resultant values.
723 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
724 _mm_sign_pi8(__m64 __a, __m64 __b)
725 {
726  return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
727 }
728 
729 /// For each 16-bit integer in the first source operand, perform one of
730 /// the following actions as specified by the second source operand.
731 ///
732 /// If the word in the second source is negative, calculate the two's
733 /// complement of the corresponding word in the first source, and write that
734 /// value to the destination. If the word in the second source is positive,
735 /// copy the corresponding word from the first source to the destination. If
736 /// the word in the second source is zero, clear the corresponding word in
737 /// the destination.
738 ///
739 /// \headerfile <x86intrin.h>
740 ///
741 /// This intrinsic corresponds to the \c PSIGNW instruction.
742 ///
743 /// \param __a
744 /// A 64-bit integer vector containing the values to be copied.
745 /// \param __b
746 /// A 64-bit integer vector containing control words corresponding to
747 /// positions in the destination.
748 /// \returns A 64-bit integer vector containing the resultant values.
749 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
750 _mm_sign_pi16(__m64 __a, __m64 __b)
751 {
752  return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
753 }
754 
755 /// For each 32-bit integer in the first source operand, perform one of
756 /// the following actions as specified by the second source operand.
757 ///
758 /// If the doubleword in the second source is negative, calculate the two's
759 /// complement of the corresponding doubleword in the first source, and
760 /// write that value to the destination. If the doubleword in the second
761 /// source is positive, copy the corresponding doubleword from the first
762 /// source to the destination. If the doubleword in the second source is
763 /// zero, clear the corresponding doubleword in the destination.
764 ///
765 /// \headerfile <x86intrin.h>
766 ///
767 /// This intrinsic corresponds to the \c PSIGND instruction.
768 ///
769 /// \param __a
770 /// A 64-bit integer vector containing the values to be copied.
771 /// \param __b
772 /// A 64-bit integer vector containing two control doublewords corresponding
773 /// to positions in the destination.
774 /// \returns A 64-bit integer vector containing the resultant values.
775 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
776 _mm_sign_pi32(__m64 __a, __m64 __b)
777 {
778  return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
779 }
780 
781 #undef __DEFAULT_FN_ATTRS
782 #undef __DEFAULT_FN_ATTRS_MMX
783 
784 #endif /* __TMMINTRIN_H */
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ void int __a
Definition: emmintrin.h:4057
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:672
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition: tmmintrin.h:223
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:750
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:620
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:390
#define __DEFAULT_FN_ATTRS
Definition: tmmintrin.h:20
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:367
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:59
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:549
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:529
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition: tmmintrin.h:269
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:95
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:499
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:41
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:646
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:344
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition: tmmintrin.h:439
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:77
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition: tmmintrin.h:321
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:724
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:776
#define __DEFAULT_FN_ATTRS_MMX
Definition: tmmintrin.h:23
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:569
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:246
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition: tmmintrin.h:465
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition: tmmintrin.h:295
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:698
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:200
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:595
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:413
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:131
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:113